diff --git a/NOTICE b/NOTICE index 64177a4eb..3e1163c07 100644 --- a/NOTICE +++ b/NOTICE @@ -1,3 +1,30 @@ AWS Crt Python Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. SPDX-License-Identifier: Apache-2.0. + +** XXHash - https://xxhash.com/ +Copyright (c) 2012-2021 Yann Collet +All rights reserved. + +BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md index 43d019a9e..63082cb08 100644 --- a/README.md +++ b/README.md @@ -147,3 +147,6 @@ AWS_EXTRA_LIB_DIR=C:\path\to\libs;D:\another\path python3 -m pip install . ### Windows SDK Version aws-crt-python builds against windows sdk version `10.0.17763.0` . This is the minimal version required for TLS 1.3 support on Windows. If you need a different Windows SDK version, you can set environment variable `AWS_CRT_WINDOWS_SDK_VERSION=` while building from source: + +### Attribution +This library exposes native XXHash implementation (https://github.com/Cyan4973/xxHash). diff --git a/awscrt/checksums.py b/awscrt/checksums.py index abade6e61..d2145109b 100644 --- a/awscrt/checksums.py +++ b/awscrt/checksums.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0. import _awscrt +from awscrt import NativeResource +from typing import Union def crc32(input: bytes, previous_crc32: int = 0) -> int: @@ -111,3 +113,63 @@ def combine_crc64nvme(crc64nvme_result1: int, crc64nvme_result2: int, data_lengt The combined CRC64-NVME checksum as if computed over the concatenated data """ return _awscrt.checksums_crc64nvme_combine(crc64nvme_result1, crc64nvme_result2, data_length2) + + +class XXHash(NativeResource): + def __init__(self, binding): + super().__init__() + self._binding = binding + + @staticmethod + def new_xxhash64(seed: int = 0) -> 'XXHash': + """ + Generates a new instance of XXHash64 hash. + """ + return XXHash(binding=_awscrt.xxhash64_new(seed)) + + @staticmethod + def new_xxhash3_64(seed: int = 0) -> 'XXHash': + """ + Generates a new instance of XXHash3_64 hash. + """ + return XXHash(binding=_awscrt.xxhash3_64_new(seed)) + + @staticmethod + def new_xxhash3_128(seed: int = 0) -> 'XXHash': + """ + Generates a new instance of XXHash3_128 hash. + """ + return XXHash(binding=_awscrt.xxhash3_128_new(seed)) + + @staticmethod + def compute_xxhash64(input: Union[bytes, bytearray, memoryview], seed: int = 0) -> bytes: + """ + One-shot compute of xxhash64 + """ + return _awscrt.xxhash64_compute(input, seed) + + @staticmethod + def compute_xxhash3_64(input: Union[bytes, bytearray, memoryview], seed: int = 0) -> bytes: + """ + One-shot compute of xxhash3_64 + """ + return _awscrt.xxhash3_64_compute(input, seed) + + @staticmethod + def compute_xxhash3_128(input: Union[bytes, bytearray, memoryview], seed: int = 0) -> bytes: + """ + One-shot compute of xxhash3_128 + """ + return _awscrt.xxhash3_128_compute(input, seed) + + def update(self, input: Union[bytes, bytearray, memoryview]): + """ + Updates hash with the provided input. + """ + _awscrt.xxhash_update(self._binding, input) + + def finalize(self) -> bytes: + """ + Finalizes hash. + """ + return _awscrt.xxhash_finalize(self._binding) diff --git a/crt/aws-checksums b/crt/aws-checksums index 270b15acc..c412c6360 160000 --- a/crt/aws-checksums +++ b/crt/aws-checksums @@ -1 +1 @@ -Subproject commit 270b15acc1b2125340ec1c6dda6cc3c28ef0fa44 +Subproject commit c412c636091501c2cd544d23664c8d14999e9dcc diff --git a/source/checksums.h b/source/checksums.h index cb6464f3e..7bd5b7441 100644 --- a/source/checksums.h +++ b/source/checksums.h @@ -13,4 +13,13 @@ PyObject *aws_py_checksums_crc32_combine(PyObject *self, PyObject *args); PyObject *aws_py_checksums_crc32c_combine(PyObject *self, PyObject *args); PyObject *aws_py_checksums_crc64nvme_combine(PyObject *self, PyObject *args); +PyObject *aws_py_xxhash64_new(PyObject *self, PyObject *args); +PyObject *aws_py_xxhash3_64_new(PyObject *self, PyObject *args); +PyObject *aws_py_xxhash3_128_new(PyObject *self, PyObject *args); +PyObject *aws_py_xxhash64_compute(PyObject *self, PyObject *args); +PyObject *aws_py_xxhash3_64_compute(PyObject *self, PyObject *args); +PyObject *aws_py_xxhash3_128_compute(PyObject *self, PyObject *args); +PyObject *aws_py_xxhash_update(PyObject *self, PyObject *args); +PyObject *aws_py_xxhash_finalize(PyObject *self, PyObject *args); + #endif /* AWS_CRT_PYTHON_CHECKSUMS_H */ diff --git a/source/module.c b/source/module.c index dd346747e..0b752e03d 100644 --- a/source/module.c +++ b/source/module.c @@ -863,6 +863,16 @@ static PyMethodDef s_module_methods[] = { AWS_PY_METHOD_DEF(checksums_crc32c_combine, METH_VARARGS), AWS_PY_METHOD_DEF(checksums_crc64nvme_combine, METH_VARARGS), + /* XXHash Checksum primitives */ + AWS_PY_METHOD_DEF(xxhash64_new, METH_VARARGS), + AWS_PY_METHOD_DEF(xxhash3_64_new, METH_VARARGS), + AWS_PY_METHOD_DEF(xxhash3_128_new, METH_VARARGS), + AWS_PY_METHOD_DEF(xxhash64_compute, METH_VARARGS), + AWS_PY_METHOD_DEF(xxhash3_64_compute, METH_VARARGS), + AWS_PY_METHOD_DEF(xxhash3_128_compute, METH_VARARGS), + AWS_PY_METHOD_DEF(xxhash_update, METH_VARARGS), + AWS_PY_METHOD_DEF(xxhash_finalize, METH_VARARGS), + /* HTTP */ AWS_PY_METHOD_DEF(http_connection_close, METH_VARARGS), AWS_PY_METHOD_DEF(http_connection_is_open, METH_VARARGS), diff --git a/source/xxhash.c b/source/xxhash.c new file mode 100644 index 000000000..1fd36b22a --- /dev/null +++ b/source/xxhash.c @@ -0,0 +1,243 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +#include "checksums.h" + +#include "aws/checksums/xxhash.h" + +const char *s_capsule_name_xxhash = "aws_xxhash"; + +static void s_xxhash_destructor(PyObject *xxhash_capsule) { + struct aws_xxhash *hash = PyCapsule_GetPointer(xxhash_capsule, s_capsule_name_xxhash); + assert(hash); + + aws_xxhash_destroy(hash); +} + +PyObject *aws_py_xxhash64_new(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_seed; + + if (!PyArg_ParseTuple(args, "O", &py_seed)) { + return NULL; + } + + uint64_t seed = PyLong_AsUnsignedLongLong(py_seed); + + if (seed == (uint64_t)-1 && PyErr_Occurred()) { + return NULL; + } + + PyObject *capsule = NULL; + struct aws_allocator *allocator = aws_py_get_allocator(); + + struct aws_xxhash *hash = aws_xxhash64_new(allocator, seed); + + if (hash == NULL) { + return PyErr_AwsLastError(); + } + + capsule = PyCapsule_New(hash, s_capsule_name_xxhash, s_xxhash_destructor); + + if (capsule == NULL) { + aws_xxhash_destroy(hash); + } + + return capsule; +} + +PyObject *aws_py_xxhash3_64_new(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_seed; + + if (!PyArg_ParseTuple(args, "O", &py_seed)) { + return NULL; + } + + uint64_t seed = PyLong_AsUnsignedLongLong(py_seed); + + if (seed == (uint64_t)-1 && PyErr_Occurred()) { + return NULL; + } + + PyObject *capsule = NULL; + struct aws_allocator *allocator = aws_py_get_allocator(); + + struct aws_xxhash *hash = aws_xxhash3_64_new(allocator, seed); + + if (hash == NULL) { + return PyErr_AwsLastError(); + } + + capsule = PyCapsule_New(hash, s_capsule_name_xxhash, s_xxhash_destructor); + + if (capsule == NULL) { + aws_xxhash_destroy(hash); + } + + return capsule; +} + +PyObject *aws_py_xxhash3_128_new(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_seed; + + if (!PyArg_ParseTuple(args, "O", &py_seed)) { + return NULL; + } + + uint64_t seed = PyLong_AsUnsignedLongLong(py_seed); + + if (seed == (uint64_t)-1 && PyErr_Occurred()) { + return NULL; + } + + PyObject *capsule = NULL; + struct aws_allocator *allocator = aws_py_get_allocator(); + + struct aws_xxhash *hash = aws_xxhash3_128_new(allocator, seed); + + if (hash == NULL) { + return PyErr_AwsLastError(); + } + + capsule = PyCapsule_New(hash, s_capsule_name_xxhash, s_xxhash_destructor); + + if (capsule == NULL) { + aws_xxhash_destroy(hash); + } + + return capsule; +} + +PyObject *aws_py_xxhash64_compute(PyObject *self, PyObject *args) { + (void)self; + struct aws_byte_cursor input; + PyObject *py_seed; + if (!PyArg_ParseTuple(args, "y#O", &input.ptr, &input.len, &py_seed)) { + return NULL; + } + + uint64_t seed = PyLong_AsUnsignedLongLong(py_seed); + + if (seed == (uint64_t)-1 && PyErr_Occurred()) { + return NULL; + } + + struct aws_allocator *allocator = aws_py_get_allocator(); + struct aws_byte_buf buf; + aws_byte_buf_init(&buf, allocator, 8); + + if (aws_xxhash64_compute(seed, input, &buf)) { + aws_byte_buf_clean_up_secure(&buf); + return PyErr_AwsLastError(); + } + + PyObject *ret = PyBytes_FromStringAndSize((const char *)buf.buffer, buf.len); + aws_byte_buf_clean_up_secure(&buf); + return ret; +} + +PyObject *aws_py_xxhash3_64_compute(PyObject *self, PyObject *args) { + (void)self; + struct aws_byte_cursor input; + PyObject *py_seed; + if (!PyArg_ParseTuple(args, "y#O", &input.ptr, &input.len, &py_seed)) { + return NULL; + } + + uint64_t seed = PyLong_AsUnsignedLongLong(py_seed); + + if (seed == (uint64_t)-1 && PyErr_Occurred()) { + return NULL; + } + + struct aws_allocator *allocator = aws_py_get_allocator(); + struct aws_byte_buf buf; + aws_byte_buf_init(&buf, allocator, 8); + + if (aws_xxhash3_64_compute(seed, input, &buf)) { + aws_byte_buf_clean_up_secure(&buf); + return PyErr_AwsLastError(); + } + + PyObject *ret = PyBytes_FromStringAndSize((const char *)buf.buffer, buf.len); + aws_byte_buf_clean_up_secure(&buf); + return ret; +} + +PyObject *aws_py_xxhash3_128_compute(PyObject *self, PyObject *args) { + (void)self; + struct aws_byte_cursor input; + PyObject *py_seed; + if (!PyArg_ParseTuple(args, "y#O", &input.ptr, &input.len, &py_seed)) { + return NULL; + } + + uint64_t seed = PyLong_AsUnsignedLongLong(py_seed); + + if (seed == (uint64_t)-1 && PyErr_Occurred()) { + return NULL; + } + + struct aws_allocator *allocator = aws_py_get_allocator(); + struct aws_byte_buf buf; + aws_byte_buf_init(&buf, allocator, 16); + + if (aws_xxhash3_128_compute(seed, input, &buf)) { + aws_byte_buf_clean_up_secure(&buf); + return PyErr_AwsLastError(); + } + + PyObject *ret = PyBytes_FromStringAndSize((const char *)buf.buffer, buf.len); + aws_byte_buf_clean_up_secure(&buf); + return ret; +} + +PyObject *aws_py_xxhash_update(PyObject *self, PyObject *args) { + (void)self; + struct aws_byte_cursor input; + PyObject *xxhash_capsule = NULL; + if (!PyArg_ParseTuple(args, "Oy#", &xxhash_capsule, &input.ptr, &input.len)) { + return NULL; + } + + struct aws_xxhash *hash = PyCapsule_GetPointer(xxhash_capsule, s_capsule_name_xxhash); + if (hash == NULL) { + return NULL; + } + + if (aws_xxhash_update(hash, input)) { + return PyErr_AwsLastError(); + } + + Py_RETURN_NONE; +} + +PyObject *aws_py_xxhash_finalize(PyObject *self, PyObject *args) { + (void)self; + PyObject *xxhash_capsule = NULL; + if (!PyArg_ParseTuple(args, "O", &xxhash_capsule)) { + return NULL; + } + + struct aws_xxhash *hash = PyCapsule_GetPointer(xxhash_capsule, s_capsule_name_xxhash); + if (hash == NULL) { + return NULL; + } + + struct aws_allocator *allocator = aws_py_get_allocator(); + struct aws_byte_buf buf; + aws_byte_buf_init(&buf, allocator, 16); + + if (aws_xxhash_finalize(hash, &buf)) { + aws_byte_buf_clean_up_secure(&buf); + return PyErr_AwsLastError(); + } + + PyObject *ret = PyBytes_FromStringAndSize((const char *)buf.buffer, buf.len); + aws_byte_buf_clean_up_secure(&buf); + return ret; +} diff --git a/test/test_checksums.py b/test/test_checksums.py index 91890ceb4..fbe53c31c 100644 --- a/test/test_checksums.py +++ b/test/test_checksums.py @@ -218,6 +218,52 @@ def test_combine_invalid_inputs(self): # Result should be an integer self.assertIsInstance(result, int) + def test_xxhash64_piping(self): + """Test xxhash64 piping from native side""" + data = b"Hello world" + + out = checksums.XXHash.compute_xxhash64(data) + + expected = bytes([0xc5, 0x00, 0xb0, 0xc9, 0x12, 0xb3, 0x76, 0xd8]) + + self.assertEqual(out, expected) + + hash = checksums.XXHash.new_xxhash64() + hash.update(data) + out2 = hash.finalize() + self.assertEqual(out2, expected) + + def test_xxhash3_64_piping(self): + """Test xxhash3_64 piping from native side""" + data = b"Hello world" + + out = checksums.XXHash.compute_xxhash3_64(data) + + expected = bytes([0xb6, 0xac, 0xb9, 0xd8, 0x4a, 0x38, 0xff, 0x74]) + + self.assertEqual(out, expected) + + hash = checksums.XXHash.new_xxhash3_64() + hash.update(data) + out2 = hash.finalize() + self.assertEqual(out2, expected) + + def test_xxhash3_128_piping(self): + """Test xxhash3_128 piping from native side""" + data = b"Hello world" + + out = checksums.XXHash.compute_xxhash3_128(data) + + expected = bytes([0x73, 0x51, 0xf8, 0x98, 0x12, 0xf9, 0x73, 0x82, + 0xb9, 0x1d, 0x05, 0xb3, 0x1e, 0x04, 0xdd, 0x7f]) + + self.assertEqual(out, expected) + + hash = checksums.XXHash.new_xxhash3_128() + hash.update(data) + out2 = hash.finalize() + self.assertEqual(out2, expected) + if __name__ == '__main__': unittest.main()