From 4b5efb5c4c16213b18be534bd5b65cf7802fe0d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rub=C3=A9n=20Rubio=20Barrera?= Date: Sat, 21 Feb 2026 11:42:22 +0100 Subject: [PATCH] Add doc example of uploading large files to S3 --- guides/LargeFilesToS3.rst | 245 ++++++++++++++++++++++++++++++++++++++ guides/index.rst | 1 + 2 files changed, 246 insertions(+) create mode 100644 guides/LargeFilesToS3.rst diff --git a/guides/LargeFilesToS3.rst b/guides/LargeFilesToS3.rst new file mode 100644 index 0000000..2330b85 --- /dev/null +++ b/guides/LargeFilesToS3.rst @@ -0,0 +1,245 @@ +Compressing large files to S3 compatible storages +============= + +S3 compatible storages usually have a limitation of 5 GiB when using single upload. When generating larger zips, the +solution is to use a `multi-part upload`_. + +.. _multi-part upload: ` + +We can implement a `PSR-7 stream `_ that buffers ZipStream's output and uploads +to S3 in chunks. + +MultipartUploadBufferStream example +--------------- + +.. code-block:: php + client->createMultipartUpload( + [ + 'Bucket' => $bucket, + 'Key' => $destinationFileName, + 'StorageClass' => 'REDUCED_REDUNDANCY', + ] + ); + + $this->uploadId = $result['UploadId']; + + $this->parts['Parts'] = []; + } + + public function write($string): int + { + $chunkSize = strlen($string); + + $this->buffer .= $string; + $this->bufferSize += $chunkSize; + + if ($this->bufferSize >= self::PART_SIZE) { + $this->uploadPart(); + } + + return $chunkSize; + } + + public function close(): void + { + // Upload remaining closing bytes from zip + $this->uploadPart(); + + $this->client->completeMultipartUpload([ + 'Bucket' => $this->bucket, + 'Key' => $this->destinationFileName, + 'UploadId' => $this->uploadId, + 'MultipartUpload' => $this->parts, + ]); + + $this->buffer = ''; + $this->bufferSize = 0; + } + + private function uploadPart(): void + { + $result = $this->client->uploadPart([ + 'Bucket' => $this->bucket, + 'Key' => $this->destinationFileName, + 'UploadId' => $this->uploadId, + 'PartNumber' => $this->partNumber, + 'Body' => $this->buffer, + ]); + + $this->buffer = ''; + $this->bufferSize = 0; + + $this->parts['Parts'][$this->partNumber] = [ + 'PartNumber' => $this->partNumber, + 'ETag' => $result['ETag'], + ]; + + $this->partNumber++; + + $result = null; + gc_collect_cycles(); // To avoid memory leaks. @see github.com/aws/aws-sdk-php/issues/1273 + } + + public function __toString(): string + { + return $this->getContents(); + } + + public function getContents(): string + { + $buffer = $this->buffer; + $this->buffer = ''; + $this->bufferSize = 0; + + return $buffer; + } + + public function detach(): null + { + $this->close(); + + return null; + } + + public function getSize(): int + { + return $this->bufferSize; + } + + public function isReadable(): bool + { + return true; + } + + public function isWritable(): bool + { + return true; + } + + public function isSeekable(): bool + { + return false; + } + + public function rewind(): void + { + $this->seek(0); + } + + public function seek($offset, $whence = SEEK_SET): void + { + throw new \RuntimeException('Cannot seek a BufferStream'); + } + + public function eof(): bool + { + return $this->bufferSize === 0; + } + + public function tell(): int + { + throw new \RuntimeException('Cannot determine the position of a BufferStream'); + } + + public function read($length): string + { + $currentLength = $this->bufferSize; + + if ($length >= $currentLength) { + // No need to slice the buffer because we don't have enough data. + $result = $this->buffer; + $this->buffer = ''; + $this->bufferSize = 0; + } else { + // Slice up the result to provide a subset of the buffer. + $result = substr($this->buffer, 0, $length); + $this->buffer = substr($this->buffer, $length); + $this->bufferSize -= $length; + } + + return $result; + } + + public function getMetadata($key = null) + { + return $key ? null : []; + } + } + +Then, we can use it with ZipStream to compress the files and upload the parts to the storage. + +ZipStream usage +--------------- + +.. code-block:: php + 'your region', + 'version' => 'latest', + 'bucketName' => $bucket, + 'credentials' => CredentialProvider::defaultProvider(), + ]); + + $bufferStream = new MultipartUploadBufferStream( + 'destination-file.zip', + $bucket, + $client, + ); + + $zip = new ZipStream( + outputStream: $destination, + defaultCompressionMethod: CompressionMethod::STORE, + defaultEnableZeroHeader: true, + sendHttpHeaders: false, + ); + + $zip->addFile( + fileName: 'big-file-1.txt', + data: 'File1 data', + ); + $zip->addFile( + fileName: 'big-file-2.txt', + data: 'File2 data', + ); + + $zip->finish(); + $destination->close(); // Needed after $zip->finish() to upload the last remaining bytes to S3 + +You can read more about the logic behind this implementation in the `discussion`_. + +.. _discussion: https://github.com/maennchen/ZipStream-PHP/discussions/402 diff --git a/guides/index.rst b/guides/index.rst index 70211c0..8d429a4 100644 --- a/guides/index.rst +++ b/guides/index.rst @@ -16,6 +16,7 @@ user, which is much faster. It can work with S3 buckets or any PSR7 Stream. Nginx Varnish ContentLength + LargeFilesToS3 Installation ---------------