Skip to content
2 changes: 2 additions & 0 deletions gems/aws-sdk-s3/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
Unreleased Changes
------------------

* Feature - TODO

1.199.0 (2025-09-08)
------------------

Expand Down
3 changes: 3 additions & 0 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/customizations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ module S3
autoload :BucketRegionCache, 'aws-sdk-s3/bucket_region_cache'
autoload :Encryption, 'aws-sdk-s3/encryption'
autoload :EncryptionV2, 'aws-sdk-s3/encryption_v2'
autoload :DefaultExecutor, 'aws-sdk-s3/default_executor'
autoload :FilePart, 'aws-sdk-s3/file_part'
autoload :FileUploader, 'aws-sdk-s3/file_uploader'
autoload :FileDownloader, 'aws-sdk-s3/file_downloader'
Expand All @@ -18,6 +19,8 @@ module S3
autoload :ObjectMultipartCopier, 'aws-sdk-s3/object_multipart_copier'
autoload :PresignedPost, 'aws-sdk-s3/presigned_post'
autoload :Presigner, 'aws-sdk-s3/presigner'
autoload :DirectoryUploader, 'aws-sdk-s3/directory_uploader'
autoload :DirectoryDownloader, 'aws-sdk-s3/directory_downloader'
autoload :TransferManager, 'aws-sdk-s3/transfer_manager'

# s3 express session auth
Expand Down
58 changes: 58 additions & 0 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/default_executor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# frozen_string_literal: true

module Aws
module S3
# @api private
class DefaultExecutor
def initialize(options = {})
@queue = Queue.new
@max_threads = options[:max_threads] || 10
@pool = []
@running = true
monitor_pool
end

def post(*args, &block)
raise 'Executor is not running' unless @running

@queue << [args, block]
end

def shutdown
@running = false
@max_threads.times { @queue << :shutdown }
@pool.each(&:join)
@pool.clear
true
end

def running?
@running
end

private

def monitor_pool
Thread.new do
while @running
@pool.select!(&:alive?)

@pool << spawn_worker if @queue.size > @pool.size && @pool.size < @max_threads
sleep(0.01)
end
end
end

def spawn_worker
Thread.new do
while (job = @queue.pop)
break if job == :shutdown

args, block = job
block.call(*args)
end
end
end
end
end
end
105 changes: 105 additions & 0 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/directory_downloader.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# frozen_string_literal: true

module Aws
module S3
# Raised when DirectoryDownloader fails to download objects from S3 bucket
class DirectoryDownloadError < StandardError
def initialize(message, errors = [])
@errors = errors
super(message)
end

# @return [Array<StandardError>] The list of errors encountered when downloading objects
attr_reader :errors
end

# @api private
class DirectoryDownloader
def initialize(options = {})
@client = options[:client] || Client.new
@executor = options[:executor]
end

attr_reader :client, :executor

def download(destination, bucket:, **options)
if File.exist?(destination)
raise ArgumentError 'invalid destination, expected a directory' unless File.directory?(destination)
else
FileUtils.mkdir_p(destination)
end

download_opts = options.dup
@destination = destination
@bucket = bucket
@recursive = download_opts.delete(:recursive) || false
@s3_prefix = download_opts.delete(:s3_prefix)
@s3_delimiter = download_opts.delete(:s3_delimiter) || '/'
@failure_policy = download_opts.delete(:failure_policy) || :abort

downloader = FileDownloader.new(client: client, executor: @executor)
@download_queue = SizedQueue.new(100)
@abort_download = false
@errors = []

Thread.new do
stream_keys
@download_queue << :done
end

download_attempts = 0
completion_queue = Queue.new
while (queue_key = @download_queue.shift) != :done
break if @abort_download

download_attempts += 1
@executor.post(queue_key) do |k|
normalized_key = normalize_key(k)
full_path = File.join(@destination, normalized_key)
dir_path = File.dirname(full_path)
FileUtils.mkdir_p(dir_path) unless dir_path == @destination || Dir.exist?(dir_path)

downloader.download(full_path, download_opts.merge(bucket: @bucket, key: k))
rescue StandardError => e
@errors << e
@abort_download = true if @failure_policy == :abort
ensure
completion_queue << :done
end
end

download_attempts.times { completion_queue.pop }

if @abort_download
msg = "failed to download directory: attempt to download #{download_attempts} objects " \
"but failed to download #{@errors.count} objects."
raise DirectoryDownloadError, msg + @errors.to_s
else
{
downloaded: download_attempts - @errors.count,
errors: @errors.count
}
end
end

def normalize_key(key)
key = key.delete_prefix(@s3_prefix) if @s3_prefix
return key.tr('/', @s3_delimiter) if @s3_delimiter != '/'
return key if File::SEPARATOR == '/'

key.tr('/', File::SEPARATOR)
end

def stream_keys(continuation_token: nil)
resp = @client.list_objects_v2(bucket: @bucket, continuation_token: continuation_token)
resp.contents.each do |o|
break if @abort_download
next if o.key.end_with?('/')

@download_queue << o.key
end
stream_keys(continuation_token: resp.next_continuation_token) if resp.next_continuation_token
end
end
end
end
131 changes: 131 additions & 0 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/directory_uploader.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# frozen_string_literal: true

require 'find'
require 'set'

module Aws
module S3
# Raised when DirectoryUploader fails to upload files to S3 bucket
class DirectoryUploadError < StandardError
def initialize(message, errors = [])
@errors = errors
super(message)
end

# @return [Array<StandardError>] The list of errors encountered when uploading objects
attr_reader :errors
end

# @api private
class DirectoryUploader
def initialize(options = {})
@client = options[:client] || Client.new
@executor = options[:executor]
end

# @return [Client]
attr_reader :client

def upload(source, bucket:, **options)
raise ArgumentError, 'Invalid directory' unless Dir.exist?(source)

upload_opts = options.dup
@source = source
@bucket = bucket
@recursive = upload_opts.delete(:recursive) || false
@follow_symlinks = upload_opts.delete(:follow_symlinks) || false
@s3_prefix = upload_opts.delete(:s3_prefix)
@s3_delimiter = upload_opts.delete(:s3_delimiter) || '/'
@filter_callback = upload_opts.delete(:filter_callback)

uploader = FileUploader.new(
multipart_threshold: upload_opts.delete(:multipart_threshold),
client: @client,
executor: @executor
)
@upload_queue = SizedQueue.new(100)
@errors = []
@abort_upload = false

Thread.new do
if @recursive
stream_recursive_files
else
stream_direct_files
end
@upload_queue << :done
end

upload_attempts = 0
completion_queue = Queue.new
while (queue_file = @upload_queue.shift) != :done
break if @abort_upload

upload_attempts += 1
@executor.post(queue_file) do |f|

path = File.join(@source, f)
# TODO: key to consider s3_prefix and custom delimiter
uploader.upload(path, upload_opts.merge(bucket: @bucket, key: f))
rescue StandardError => e
@errors << e
@abort_download = true if @failure_policy == :abort
ensure
completion_queue << :done
end
end
file_count.times { completion_queue.pop }

if @abort_upload
msg = "failed to upload directory: attempt to upload #{upload_attempts} files " \
"but failed to upload #{@errors.count} files."
raise DirectoryUploadError, msg + @errors.to_s
else
{
upload: upload_attempts - @errors.count,
errors: @errors.count
}
end
end

private

# TODO: need to optimize & handle failures
def stream_recursive_files
visited = Set.new
Find.find(@source) do |p|
break if @abort_upload

if !@follow_symlinks && File.symlink?(p)
Find.prune
next
end

absolute_path = File.realpath(p)
if visited.include?(absolute_path)
Find.prune
next
end

visited << absolute_path

# TODO: if non-default s3_delimiter is used, validate here and fail
@upload_queue << p if File.file?(p)
end
end

# TODO: need to optimize & handle failures
def stream_direct_files
Dir.each_child(@source) do |entry|
break if @abort_upload

path = File.join(@source, entry)
next if !@follow_symlinks && File.symlink?(path)

# TODO: if non-default s3_delimiter is used, validate here and fail
@upload_queue << entry if File.file?(path)
end
end
end
end
end
Loading
Loading