diff --git a/docs/changelog-fragments/664.bugfix.rst b/docs/changelog-fragments/664.bugfix.rst new file mode 100644 index 000000000..cd12b2562 --- /dev/null +++ b/docs/changelog-fragments/664.bugfix.rst @@ -0,0 +1 @@ +Improved performance of SFTP transfers by using larger transfer chunks -- by :user:`Jakuje`. diff --git a/src/pylibsshext/includes/libssh.pxd b/src/pylibsshext/includes/libssh.pxd index 3219f1cd9..673d7eef1 100644 --- a/src/pylibsshext/includes/libssh.pxd +++ b/src/pylibsshext/includes/libssh.pxd @@ -27,6 +27,10 @@ cdef extern from "libssh/libssh.h" nogil: pass ctypedef ssh_session_struct* ssh_session + cdef struct ssh_string_struct: + pass + ctypedef ssh_string_struct* ssh_string + cdef struct ssh_key_struct: pass ctypedef ssh_key_struct* ssh_key diff --git a/src/pylibsshext/includes/sftp.pxd b/src/pylibsshext/includes/sftp.pxd index 9d3db9310..94376578d 100644 --- a/src/pylibsshext/includes/sftp.pxd +++ b/src/pylibsshext/includes/sftp.pxd @@ -17,7 +17,9 @@ # from posix.types cimport mode_t -from pylibsshext.includes.libssh cimport ssh_channel, ssh_session +from libc cimport stdint + +from pylibsshext.includes.libssh cimport ssh_channel, ssh_session, ssh_string cdef extern from "libssh/sftp.h" nogil: @@ -30,6 +32,31 @@ cdef extern from "libssh/sftp.h" nogil: pass ctypedef sftp_file_struct * sftp_file + struct sftp_attributes_struct: + char *name + char *longname + stdint.uint32_t flags + stdint.uint8_t type + stdint.uint64_t size + stdint.uint32_t uid + stdint.uint32_t gid + char *owner + char *group + stdint.uint32_t permissions + stdint.uint64_t atime64 + stdint.uint32_t atime + stdint.uint32_t atime_nseconds + stdint.uint64_t createtime + stdint.uint32_t createtime_nseconds + stdint.uint64_t mtime64 + stdint.uint32_t mtime + stdint.uint32_t mtime_nseconds + ssh_string acl + stdint.uint32_t extended_count + ssh_string extended_type + ssh_string extended_data + ctypedef sftp_attributes_struct * sftp_attributes + cdef int SSH_FX_OK cdef int SSH_FX_EOF cdef int SSH_FX_NO_SUCH_FILE @@ -55,5 +82,8 @@ cdef extern from "libssh/sftp.h" nogil: ssize_t sftp_read(sftp_file file, const void *buf, size_t count) int sftp_get_error(sftp_session sftp) + sftp_attributes sftp_stat(sftp_session session, const char *path) + + cdef extern from "sys/stat.h" nogil: cdef int S_IRWXU diff --git a/src/pylibsshext/sftp.pyx b/src/pylibsshext/sftp.pyx index 6220ad8ae..7528eba1e 100644 --- a/src/pylibsshext/sftp.pyx +++ b/src/pylibsshext/sftp.pyx @@ -18,11 +18,15 @@ from posix.fcntl cimport O_CREAT, O_RDONLY, O_TRUNC, O_WRONLY from cpython.bytes cimport PyBytes_AS_STRING +from cpython.mem cimport PyMem_Free, PyMem_Malloc from pylibsshext.errors cimport LibsshSFTPException from pylibsshext.session cimport get_libssh_session +SFTP_MAX_CHUNK = 32_768 # 32kB + + MSG_MAP = { sftp.SSH_FX_OK: "No error", sftp.SSH_FX_EOF: "End-of-file encountered", @@ -63,7 +67,7 @@ cdef class SFTP: rf = sftp.sftp_open(self._libssh_sftp_session, remote_file_b, O_WRONLY | O_CREAT | O_TRUNC, sftp.S_IRWXU) if rf is NULL: raise LibsshSFTPException("Opening remote file [%s] for write failed with error [%s]" % (remote_file, self._get_sftp_error_str())) - buffer = f.read(1024) + buffer = f.read(SFTP_MAX_CHUNK) while buffer != b"": length = len(buffer) @@ -76,38 +80,54 @@ cdef class SFTP: self._get_sftp_error_str(), ) ) - buffer = f.read(1024) + buffer = f.read(SFTP_MAX_CHUNK) sftp.sftp_close(rf) def get(self, remote_file, local_file): cdef sftp.sftp_file rf - cdef char read_buffer[1024] + cdef char *read_buffer = NULL + cdef sftp.sftp_attributes attrs remote_file_b = remote_file if isinstance(remote_file_b, unicode): remote_file_b = remote_file.encode("utf-8") + attrs = sftp.sftp_stat(self._libssh_sftp_session, remote_file_b) + if attrs is NULL: + raise LibsshSFTPException("Failed to stat the remote file [%s]. Error: [%s]" + % (remote_file, self._get_sftp_error_str())) + file_size = attrs.size + rf = sftp.sftp_open(self._libssh_sftp_session, remote_file_b, O_RDONLY, sftp.S_IRWXU) if rf is NULL: raise LibsshSFTPException("Opening remote file [%s] for read failed with error [%s]" % (remote_file, self._get_sftp_error_str())) - with open(local_file, 'wb') as f: - while True: - file_data = sftp.sftp_read(rf, read_buffer, sizeof(char) * 1024) - if file_data == 0: - break - elif file_data < 0: - sftp.sftp_close(rf) - raise LibsshSFTPException("Reading data from remote file [%s] failed with error [%s]" - % (remote_file, self._get_sftp_error_str())) - - bytes_written = f.write(read_buffer[:file_data]) - if bytes_written and file_data != bytes_written: - sftp.sftp_close(rf) - raise LibsshSFTPException("Number of bytes [%s] read from remote file [%s]" - " does not match number of bytes [%s] written to local file [%s]" - " due to error [%s]" - % (file_data, remote_file, bytes_written, local_file, self._get_sftp_error_str())) + try: + with open(local_file, 'wb') as f: + buffer_size = min(SFTP_MAX_CHUNK, file_size) + read_buffer = PyMem_Malloc(buffer_size) + if read_buffer is NULL: + raise LibsshSFTPException("Memory allocation error") + + while True: + file_data = sftp.sftp_read(rf, read_buffer, sizeof(char) * buffer_size) + if file_data == 0: + break + elif file_data < 0: + sftp.sftp_close(rf) + raise LibsshSFTPException("Reading data from remote file [%s] failed with error [%s]" + % (remote_file, self._get_sftp_error_str())) + + bytes_written = f.write(read_buffer[:file_data]) + if bytes_written and file_data != bytes_written: + sftp.sftp_close(rf) + raise LibsshSFTPException("Number of bytes [%s] read from remote file [%s]" + " does not match number of bytes [%s] written to local file [%s]" + " due to error [%s]" + % (file_data, remote_file, bytes_written, local_file, self._get_sftp_error_str())) + finally: + if read_buffer is not NULL: + PyMem_Free(read_buffer) sftp.sftp_close(rf) def close(self): diff --git a/tests/unit/sftp_test.py b/tests/unit/sftp_test.py index ce1a28ffe..59e08700f 100644 --- a/tests/unit/sftp_test.py +++ b/tests/unit/sftp_test.py @@ -8,6 +8,8 @@ import pytest +from pylibsshext.sftp import SFTP_MAX_CHUNK + @pytest.fixture def sftp_session(ssh_client_session): @@ -21,7 +23,7 @@ def sftp_session(ssh_client_session): @pytest.fixture( - params=(32, 1024 + 1), + params=(32, SFTP_MAX_CHUNK + 1), ids=('small-payload', 'large-payload'), ) def transmit_payload(request: pytest.FixtureRequest) -> bytes: @@ -29,8 +31,9 @@ def transmit_payload(request: pytest.FixtureRequest) -> bytes: The choice 32 is arbitrary small value. - The choice 1024 + 1 is meant to be 1B larger than the chunk size used in - :file:`sftp.pyx` to make sure we excercise at least two rounds of reading/writing. + The choice SFTP_MAX_CHUNK + 1 (32kB + 1B) is meant to be 1B larger than the chunk + size used in :file:`sftp.pyx` to make sure we excercise at least two rounds of + reading/writing. """ payload_len = request.param random_bytes = [ord(random.choice(string.printable)) for _ in range(payload_len)]