Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 45 additions & 7 deletions awscli/customizations/s3/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@

from awscli.compat import ensure_text_type, queue
from awscli.customizations.s3.subscribers import OnDoneFilteredSubscriber
from awscli.customizations.s3.utils import WarningResult, human_readable_size
from awscli.customizations.s3.utils import (
WarningResult,
human_readable_size,
)
from awscli.customizations.utils import uni_print

LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -54,6 +57,7 @@ def _create_new_result_cls(name, extra_fields=None, base_cls=BaseResult):
FailureResult = _create_new_result_cls('FailureResult', ['exception'])

DryRunResult = _create_new_result_cls('DryRunResult')
SkipFileResult = _create_new_result_cls('SkipFileResult')

ErrorResult = namedtuple('ErrorResult', ['exception'])

Expand Down Expand Up @@ -123,6 +127,19 @@ def _on_failure(self, future, e):
if isinstance(e, FatalError):
error_result_cls = ErrorResult
self._result_queue.put(error_result_cls(exception=e))
elif self._is_precondition_failed(e):
LOGGER.debug(
"Warning: Skipping file %s as it already exists on %s",
self._src,
self._dest,
)
self._result_queue.put(
SkipFileResult(
transfer_type=self._transfer_type,
src=self._src,
dest=self._dest,
)
)
else:
self._result_queue.put(
FailureResult(
Expand All @@ -133,6 +150,14 @@ def _on_failure(self, future, e):
)
)

def _is_precondition_failed(self, exception):
"""Check if this is a PreconditionFailed error"""
return (
hasattr(exception, 'response')
and exception.response.get('Error', {}).get('Code')
== 'PreconditionFailed'
)


class BaseResultHandler:
"""Base handler class to be called in the ResultProcessor"""
Expand All @@ -150,6 +175,7 @@ def __init__(self):
self.files_transferred = 0
self.files_failed = 0
self.files_warned = 0
self.files_skipped = 0
self.errors = 0
self.expected_bytes_transferred = 0
self.expected_files_transferred = 0
Expand All @@ -167,6 +193,7 @@ def __init__(self):
SuccessResult: self._record_success_result,
FailureResult: self._record_failure_result,
WarningResult: self._record_warning_result,
SkipFileResult: self._record_skipped_file_result,
ErrorResult: self._record_error_result,
CtrlCResult: self._record_error_result,
FinalTotalSubmissionsResult: self._record_final_expected_files,
Expand Down Expand Up @@ -282,6 +309,9 @@ def _record_failure_result(self, result, **kwargs):
self.files_failed += 1
self.files_transferred += 1

def _record_skipped_file_result(self, result, **kwargs):
self.files_skipped += 1

def _record_warning_result(self, **kwargs):
self.files_warned += 1

Expand Down Expand Up @@ -362,6 +392,7 @@ def __init__(
SuccessResult: self._print_success,
FailureResult: self._print_failure,
WarningResult: self._print_warning,
SkipFileResult: self._print_skip,
ErrorResult: self._print_error,
CtrlCResult: self._print_ctrl_c,
DryRunResult: self._print_dry_run,
Expand All @@ -380,6 +411,10 @@ def _print_noop(self, **kwargs):
# If the result does not have a handler, then do nothing with it.
pass

def _print_skip(self, **kwargs):
# Don't reset progress length since this result printer doesn't print a newline
self._redisplay_progress(reset_progress_length=False)

def _print_dry_run(self, result, **kwargs):
statement = self.DRY_RUN_FORMAT.format(
transfer_type=result.transfer_type,
Expand Down Expand Up @@ -432,16 +467,19 @@ def _get_transfer_location(self, result):
src=result.src, dest=result.dest
)

def _redisplay_progress(self):
def _redisplay_progress(self, reset_progress_length=True):
# Reset to zero because done statements are printed with new lines
# meaning there are no carriage returns to take into account when
# printing the next line.
self._progress_length = 0
if reset_progress_length:
self._progress_length = 0
self._add_progress_if_needed()

def _add_progress_if_needed(self):
if self._has_remaining_progress():
self._print_progress()
else:
self._clear_progress_if_no_more_expected_transfers(ending_char='\r')

def _should_print_progress_now(self):
"""Check to see if should print progres based on frequency.
Expand All @@ -467,7 +505,7 @@ def _print_progress(self, **kwargs):
remaining_files = self._get_expected_total(
str(
self._result_recorder.expected_files_transferred
- self._result_recorder.files_transferred
- (self._result_recorder.files_transferred + self._result_recorder.files_skipped)
)
)

Expand Down Expand Up @@ -535,7 +573,7 @@ def _adjust_statement_padding(self, print_statement, ending_char='\n'):
def _has_remaining_progress(self):
if not self._result_recorder.expected_totals_are_final():
return True
actual = self._result_recorder.files_transferred
actual = self._result_recorder.files_transferred + self._result_recorder.files_skipped
expected = self._result_recorder.expected_files_transferred
return actual != expected

Expand All @@ -545,9 +583,9 @@ def _print_to_out_file(self, statement):
def _print_to_error_file(self, statement):
uni_print(statement, self._error_file)

def _clear_progress_if_no_more_expected_transfers(self, **kwargs):
def _clear_progress_if_no_more_expected_transfers(self, ending_char='\n', **kwargs):
if self._progress_length and not self._has_remaining_progress():
uni_print(self._adjust_statement_padding(''), self._out_file)
uni_print(self._adjust_statement_padding('', ending_char=ending_char), self._out_file)


class NoProgressResultPrinter(ResultPrinter):
Expand Down
36 changes: 34 additions & 2 deletions awscli/customizations/s3/s3handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import logging
import os

from botocore.exceptions import ClientError
from s3transfer.manager import TransferManager

from awscli.compat import get_binary_stdin
Expand Down Expand Up @@ -441,7 +442,36 @@ def _get_fileout(self, fileinfo):
return fileinfo.dest

def _get_warning_handlers(self):
return [self._warn_glacier, self._warn_parent_reference]
return [
self._warn_glacier,
self._warn_parent_reference,
self._warn_if_file_exists_with_no_overwrite,
]

def _warn_if_file_exists_with_no_overwrite(self, fileinfo):
"""
Warning handler to skip downloads when no-overwrite is set and local file exists.

This method prevents overwriting existing local files during S3 download operations
when the --no-overwrite flag is specified. It checks if the destination file already
exists on the local filesystem and skips the download if found.

:type fileinfo: FileInfo
:param fileinfo: The FileInfo object containing transfer details

:rtype: bool
:returns: True if the file should be skipped (exists and no-overwrite is set),
False if the download should proceed
"""
if not self._cli_params.get('no_overwrite'):
return False
fileout = self._get_fileout(fileinfo)
if os.path.exists(fileout):
LOGGER.debug(
f"warning: skipping {fileinfo.src} -> {fileinfo.dest}, file exists at destination"
)
return True
return False

def _format_src_dest(self, fileinfo):
src = self._format_s3_path(fileinfo.src)
Expand Down Expand Up @@ -485,7 +515,9 @@ def _submit_transfer_request(self, fileinfo, extra_args, subscribers):
)

def _get_warning_handlers(self):
return [self._warn_glacier]
return [
self._warn_glacier,
]

def _format_src_dest(self, fileinfo):
src = self._format_s3_path(fileinfo.src)
Expand Down
53 changes: 49 additions & 4 deletions awscli/customizations/s3/subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,15 @@
),
}

NO_OVERWRITE = {
'name': 'no-overwrite',
'action': 'store_true',
'help_text': (
"This flag prevents overwriting of files at the destination. With this flag, "
"only files not present at the destination will be transferred."
),
}

TRANSFER_ARGS = [
DRYRUN,
QUIET,
Expand Down Expand Up @@ -1081,7 +1090,14 @@ class CpCommand(S3TransferCommand):
}
]
+ TRANSFER_ARGS
+ [METADATA, COPY_PROPS, METADATA_DIRECTIVE, EXPECTED_SIZE, RECURSIVE]
+ [
METADATA,
COPY_PROPS,
METADATA_DIRECTIVE,
EXPECTED_SIZE,
RECURSIVE,
NO_OVERWRITE,
]
)


Expand All @@ -1105,6 +1121,7 @@ class MvCommand(S3TransferCommand):
METADATA_DIRECTIVE,
RECURSIVE,
VALIDATE_SAME_S3_PATHS,
NO_OVERWRITE,
]
)

Expand Down Expand Up @@ -1150,7 +1167,7 @@ class SyncCommand(S3TransferCommand):
}
]
+ TRANSFER_ARGS
+ [METADATA, COPY_PROPS, METADATA_DIRECTIVE]
+ [METADATA, COPY_PROPS, METADATA_DIRECTIVE, NO_OVERWRITE]
)


Expand Down Expand Up @@ -1321,7 +1338,6 @@ def choose_sync_strategies(self):
sync_type = override_sync_strategy.sync_type
sync_type += '_sync_strategy'
sync_strategies[sync_type] = override_sync_strategy

return sync_strategies

def run(self):
Expand Down Expand Up @@ -1408,7 +1424,8 @@ def run(self):
self._client, self._source_client, self.parameters
)

s3_transfer_handler = S3TransferHandlerFactory(self.parameters)(
params = self._get_s3_handler_params()
s3_transfer_handler = S3TransferHandlerFactory(params)(
self._transfer_manager, result_queue
)

Expand Down Expand Up @@ -1517,6 +1534,16 @@ def _map_sse_c_params(self, request_parameters, paths_type):
},
)

def _get_s3_handler_params(self):
"""
Removing no-overwrite params from sync since file to
be synced are already separated out using sync strategy
"""
params = self.parameters.copy()
if self.cmd == 'sync':
params.pop('no_overwrite', None)
return params


# TODO: This class is fairly quirky in the sense that it is both a builder
# and a data object. In the future we should make the following refactorings
Expand Down Expand Up @@ -1580,6 +1607,7 @@ def add_paths(self, paths):
elif len(paths) == 1:
self.parameters['dest'] = paths[0]
self._validate_streaming_paths()
self._validate_no_overwrite_for_download_streaming()
self._validate_path_args()
self._validate_sse_c_args()
self._validate_not_s3_express_bucket_for_sync()
Expand Down Expand Up @@ -1831,3 +1859,20 @@ def _validate_sse_c_copy_source_for_paths(self):
'--sse-c-copy-source is only supported for '
'copy operations.'
)

def _validate_no_overwrite_for_download_streaming(self):
"""
Validates that no-overwrite parameter is not used with streaming downloads.

Raises:
ParamValidationError: If no-overwrite is specified with a streaming download.
"""
if (
self.parameters['is_stream']
and self.parameters.get('no_overwrite')
and self.parameters['dest'] == '-'
):
raise ParamValidationError(
"--no-overwrite parameter is not supported for "
"streaming downloads"
)
34 changes: 34 additions & 0 deletions awscli/customizations/s3/syncstrategy/nooverwrite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import logging

from awscli.customizations.s3.subcommands import NO_OVERWRITE
from awscli.customizations.s3.syncstrategy.base import BaseSync

LOG = logging.getLogger(__name__)


class NoOverwriteSync(BaseSync):
"""Sync strategy that prevents overwriting of existing files at the destination.
This strategy is used only for files that exist at both source and destination
(file_at_src_and_dest_sync_strategy). It always returns False to prevent any
overwriting of existing files, regardless of size or modification time differences.
"""

ARGUMENT = NO_OVERWRITE

def determine_should_sync(self, src_file, dest_file):
LOG.debug(
f"warning: skipping {src_file.src} -> {src_file.dest}, file exists at destination"
)
return False
4 changes: 4 additions & 0 deletions awscli/customizations/s3/syncstrategy/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from awscli.customizations.s3.syncstrategy.exacttimestamps import (
ExactTimestampsSync,
)
from awscli.customizations.s3.syncstrategy.nooverwrite import NoOverwriteSync
from awscli.customizations.s3.syncstrategy.sizeonly import SizeOnlySync


Expand Down Expand Up @@ -48,4 +49,7 @@ def register_sync_strategies(command_table, session, **kwargs):
# Register the delete sync strategy.
register_sync_strategy(session, DeleteSync, 'file_not_at_src')

# Register the noOverwrite sync strategy
register_sync_strategy(session, NoOverwriteSync, 'file_at_src_and_dest')

# Register additional sync strategies here...
Loading