Skip to content

Commit ef03551

Browse files
authored
[v2] Perform copy with no overwrite using IfNoneMatch (#9910)
This commit rolls back changes made to force multipart copies if no overwrite is specified. This includes checking for 0-length files. It also removes the CopyObject argument block list, since IfNoneMatch is now allowed and was the only argument in the list. It also removes the tests for 0 length files and moves some tests to the TestNonMultipartCopy class, since CopyObject accepts IfNoneMatch and no longer needs to force multipart copying.
1 parent 66d24d8 commit ef03551

File tree

6 files changed

+96
-388
lines changed

6 files changed

+96
-388
lines changed

awscli/customizations/s3/s3handler.py

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -517,43 +517,8 @@ def _submit_transfer_request(self, fileinfo, extra_args, subscribers):
517517
def _get_warning_handlers(self):
518518
return [
519519
self._warn_glacier,
520-
self._warn_if_zero_byte_file_exists_with_no_overwrite,
521520
]
522521

523-
def _warn_if_zero_byte_file_exists_with_no_overwrite(self, fileinfo):
524-
"""
525-
Warning handler to skip zero-byte files when no_overwrite is set and file exists.
526-
527-
This method handles the transfer of zero-byte objects when the no-overwrite parameter is specified.
528-
To prevent overwrite, it uses head_object to verify if the object exists at the destination:
529-
If the object is present at destination: skip the file (return True)
530-
If the object is not present at destination: allow transfer (return False)
531-
532-
:type fileinfo: FileInfo
533-
:param fileinfo: The FileInfo object containing transfer details
534-
535-
:rtype: bool
536-
:return: True if file should be skipped, False if transfer should proceed
537-
"""
538-
if not self._cli_params.get('no_overwrite') or (
539-
getattr(fileinfo, 'size') and fileinfo.size > 0
540-
):
541-
return False
542-
543-
bucket, key = find_bucket_key(fileinfo.dest)
544-
client = fileinfo.source_client
545-
try:
546-
client.head_object(Bucket=bucket, Key=key)
547-
LOGGER.debug(
548-
f"warning: skipping {fileinfo.src} -> {fileinfo.dest}, file exists at destination"
549-
)
550-
return True
551-
except ClientError as e:
552-
if e.response['Error']['Code'] == '404':
553-
return False
554-
else:
555-
raise
556-
557522
def _format_src_dest(self, fileinfo):
558523
src = self._format_s3_path(fileinfo.src)
559524
dest = self._format_s3_path(fileinfo.dest)

awscli/s3transfer/copies.py

Lines changed: 4 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,6 @@ class CopySubmissionTask(SubmissionTask):
8181
'IfNoneMatch',
8282
]
8383

84-
COPY_OBJECT_ARGS_BLOCKLIST = [
85-
'IfNoneMatch',
86-
]
8784

8885
def _submit(
8986
self, client, config, osutil, request_executor, transfer_future
@@ -141,24 +138,9 @@ def _submit(
141138
# during a multipart copy.
142139
transfer_future.meta.provide_object_etag(response.get('ETag'))
143140

144-
# Check for ifNoneMatch is enabled and file has content
145-
# Special handling for 0-byte files: Since multipart copy works with object size
146-
# and divides the object into smaller chunks, there's an edge case when the object
147-
# size is zero. This would result in 0 parts being calculated, and the
148-
# CompleteMultipartUpload operation throws a MalformedXML error when transferring
149-
# 0 parts because the XML does not validate against the published schema.
150-
# Therefore, 0-byte files are always handled via single copy request regardless
151-
# of the multipart threshold setting.
152-
should_overwrite = (
153-
call_args.extra_args.get("IfNoneMatch")
154-
and transfer_future.meta.size != 0
155-
)
156-
# If it is less than threshold and ifNoneMatch is not in parameters
157-
# do a regular copy else do multipart copy.
158-
if (
159-
transfer_future.meta.size < config.multipart_threshold
160-
and not should_overwrite
161-
):
141+
# If it is greater than threshold do a multipart copy, otherwise
142+
# do a regular copy object.
143+
if transfer_future.meta.size < config.multipart_threshold:
162144
self._submit_copy_request(
163145
client, config, osutil, request_executor, transfer_future
164146
)
@@ -175,13 +157,6 @@ def _submit_copy_request(
175157
# Get the needed progress callbacks for the task
176158
progress_callbacks = get_callbacks(transfer_future, 'progress')
177159

178-
# Submit the request of a single copy and make sure it
179-
# does not include any blocked arguments.
180-
copy_object_extra_args = {
181-
param: val
182-
for param, val in call_args.extra_args.items()
183-
if param not in self.COPY_OBJECT_ARGS_BLOCKLIST
184-
}
185160
self._transfer_coordinator.submit(
186161
request_executor,
187162
CopyObjectTask(
@@ -191,7 +166,7 @@ def _submit_copy_request(
191166
"copy_source": call_args.copy_source,
192167
"bucket": call_args.bucket,
193168
"key": call_args.key,
194-
"extra_args": copy_object_extra_args,
169+
"extra_args": call_args.extra_args,
195170
"callbacks": progress_callbacks,
196171
"size": transfer_future.meta.size,
197172
},

tests/functional/s3/test_cp_command.py

Lines changed: 11 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -387,90 +387,32 @@ def test_no_overwrite_flag_multipart_upload_when_object_exists_on_target(
387387
def test_no_overwrite_flag_on_copy_when_small_object_does_not_exist_on_target(
388388
self,
389389
):
390-
cmdline = f'{self.prefix} s3://bucket1/key.txt s3://bucket/key1.txt --no-overwrite'
391-
# Set up responses for multipart copy (since no-overwrite always uses multipart)
390+
cmdline = f'{self.prefix} s3://bucket1/key.txt s3://bucket/key.txt --no-overwrite'
392391
self.parsed_responses = [
393-
self.head_object_response(), # HeadObject to get source metadata
394-
self.create_mpu_response('foo'), # CreateMultipartUpload response
395-
self.upload_part_copy_response(), # UploadPartCopy response
396-
{}, # CompleteMultipartUpload response
392+
self.head_object_response(ContentLength=5),
393+
self.copy_object_response(),
397394
]
398395
self.run_cmd(cmdline, expected_rc=0)
399-
# Verify all multipart operations were called
400-
self.assertEqual(len(self.operations_called), 4)
401-
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
402-
self.assertEqual(
403-
self.operations_called[1][0].name, 'CreateMultipartUpload'
404-
)
405-
self.assertEqual(self.operations_called[2][0].name, 'UploadPartCopy')
406396
self.assertEqual(
407-
self.operations_called[3][0].name, 'CompleteMultipartUpload'
397+
len(self.operations_called), 2, self.operations_called
408398
)
409-
# Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request
410-
self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*')
399+
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
400+
self.assertEqual(self.operations_called[1][0].name, 'CopyObject')
401+
self.assertEqual(self.operations_called[1][1]['IfNoneMatch'], '*')
411402

412403
def test_no_overwrite_flag_on_copy_when_small_object_exists_on_target(
413404
self,
414405
):
415406
cmdline = f'{self.prefix} s3://bucket1/key.txt s3://bucket/key.txt --no-overwrite'
416-
# Set up responses for multipart copy (since no-overwrite always uses multipart)
417407
self.parsed_responses = [
418-
self.head_object_response(), # HeadObject to get source metadata
419-
self.create_mpu_response('foo'), # CreateMultipartUpload response
420-
self.upload_part_copy_response(), # UploadPartCopy response
421-
self.precondition_failed_error_response(), # CompleteMultipartUpload
422-
{}, # AbortMultipartUpload response
423-
]
424-
self.run_cmd(cmdline, expected_rc=0)
425-
# Verify all multipart operations were called
426-
self.assertEqual(len(self.operations_called), 5)
427-
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
428-
self.assertEqual(
429-
self.operations_called[1][0].name, 'CreateMultipartUpload'
430-
)
431-
self.assertEqual(self.operations_called[2][0].name, 'UploadPartCopy')
432-
self.assertEqual(
433-
self.operations_called[3][0].name, 'CompleteMultipartUpload'
434-
)
435-
self.assertEqual(
436-
self.operations_called[4][0].name, 'AbortMultipartUpload'
437-
)
438-
# Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request
439-
self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*')
440-
441-
def test_no_overwrite_flag_on_copy_when_zero_size_object_exists_at_destination(
442-
self,
443-
):
444-
cmdline = f'{self.prefix} s3://bucket1/file.txt s3://bucket2/file.txt --no-overwrite'
445-
self.parsed_responses = [
446-
self.head_object_response(
447-
ContentLength=0
448-
), # Source object (zero size)
449-
self.head_object_response(), # Checking the object at destination
408+
self.head_object_response(ContentLength=5),
409+
self.precondition_failed_error_response(),
450410
]
451411
self.run_cmd(cmdline, expected_rc=0)
452412
self.assertEqual(len(self.operations_called), 2)
453413
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
454-
self.assertEqual(self.operations_called[1][0].name, 'HeadObject')
455-
456-
def test_no_overwrite_flag_on_copy_when_zero_size_object_not_exists_at_destination(
457-
self,
458-
):
459-
cmdline = f'{self.prefix} s3://bucket1/file.txt s3://bucket2/file1.txt --no-overwrite'
460-
self.parsed_responses = [
461-
self.head_object_response(
462-
ContentLength=0
463-
), # Source object (zero size)
464-
{
465-
'Error': {'Code': '404', 'Message': 'Not Found'}
466-
}, # At destination object does not exists
467-
self.copy_object_response(), # Copy Request when object does not exists
468-
]
469-
self.run_cmd(cmdline, expected_rc=0)
470-
self.assertEqual(len(self.operations_called), 3)
471-
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
472-
self.assertEqual(self.operations_called[1][0].name, 'HeadObject')
473-
self.assertEqual(self.operations_called[2][0].name, 'CopyObject')
414+
self.assertEqual(self.operations_called[1][0].name, 'CopyObject')
415+
self.assertEqual(self.operations_called[1][1]['IfNoneMatch'], '*')
474416

475417
def test_no_overwrite_flag_on_copy_when_large_object_exists_on_target(
476418
self,

tests/functional/s3/test_mv_command.py

Lines changed: 13 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -423,26 +423,21 @@ def test_mv_no_overwrite_flag_on_copy_when_small_object_does_not_exist_on_target
423423
# Set up responses for multipart copy (since no-overwrite always uses multipart)
424424
self.parsed_responses = [
425425
self.head_object_response(), # HeadObject to get source metadata
426-
self.create_mpu_response('foo'), # CreateMultipartUpload response
427-
self.upload_part_copy_response(), # UploadPartCopy response
428-
{}, # CompleteMultipartUpload response
429-
self.delete_object_response(), # DeleteObject (for move operation)
426+
self.copy_object_response(),
427+
self.delete_object_response(),
430428
]
431429
self.run_cmd(cmdline, expected_rc=0)
432430
# Verify all multipart copy operations were called
433-
self.assertEqual(len(self.operations_called), 5)
434-
self.assertEqual(len(self.operations_called), 5)
431+
self.assertEqual(len(self.operations_called), 3)
435432
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
436433
self.assertEqual(
437-
self.operations_called[1][0].name, 'CreateMultipartUpload'
434+
self.operations_called[1][0].name, 'CopyObject'
438435
)
439-
self.assertEqual(self.operations_called[2][0].name, 'UploadPartCopy')
436+
self.assertEqual(self.operations_called[1][1]['IfNoneMatch'], '*')
437+
440438
self.assertEqual(
441-
self.operations_called[3][0].name, 'CompleteMultipartUpload'
439+
self.operations_called[2][0].name, 'DeleteObject'
442440
)
443-
self.assertEqual(self.operations_called[4][0].name, 'DeleteObject')
444-
# Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request
445-
self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*')
446441

447442
def test_mv_no_overwrite_flag_on_copy_when_small_object_exists_on_target(
448443
self,
@@ -451,65 +446,19 @@ def test_mv_no_overwrite_flag_on_copy_when_small_object_exists_on_target(
451446
# Set up responses for multipart copy (since no-overwrite always uses multipart)
452447
self.parsed_responses = [
453448
self.head_object_response(), # HeadObject to get source metadata
454-
self.create_mpu_response('foo'), # CreateMultipartUpload response
455-
self.upload_part_copy_response(), # UploadPartCopy response
456-
self.precondition_failed_error_response(), # CompleteMultipartUpload response
457-
{}, # AbortMultipart
449+
self.precondition_failed_error_response(), # CopyObject response
458450
]
459451
self.run_cmd(cmdline, expected_rc=0)
460452
# Set up the response to simulate a PreconditionFailed error
461453
self.http_response.status_code = 412
462-
# Verify all multipart copy operations were called
463-
self.assertEqual(len(self.operations_called), 5)
454+
# Verify all copy operations were called
455+
self.assertEqual(len(self.operations_called), 2)
464456
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
465457
self.assertEqual(
466-
self.operations_called[1][0].name, 'CreateMultipartUpload'
467-
)
468-
self.assertEqual(self.operations_called[2][0].name, 'UploadPartCopy')
469-
self.assertEqual(
470-
self.operations_called[3][0].name, 'CompleteMultipartUpload'
471-
)
472-
self.assertEqual(
473-
self.operations_called[4][0].name, 'AbortMultipartUpload'
458+
self.operations_called[1][0].name, 'CopyObject'
474459
)
475-
# Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request
476-
self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*')
477-
478-
def test_no_overwrite_flag_on_copy_when_zero_size_object_exists_at_destination(
479-
self,
480-
):
481-
cmdline = f'{self.prefix} s3://bucket1/file.txt s3://bucket2/file.txt --no-overwrite'
482-
self.parsed_responses = [
483-
self.head_object_response(
484-
ContentLength=0
485-
), # Source object (zero size)
486-
self.head_object_response(), # Checking the object at destination
487-
]
488-
self.run_cmd(cmdline, expected_rc=0)
489-
self.assertEqual(len(self.operations_called), 2)
490-
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
491-
self.assertEqual(self.operations_called[1][0].name, 'HeadObject')
492-
493-
def test_no_overwrite_flag_on_copy_when_zero_size_object_not_exists_at_destination(
494-
self,
495-
):
496-
cmdline = f'{self.prefix} s3://bucket1/file.txt s3://bucket2/file1.txt --no-overwrite'
497-
self.parsed_responses = [
498-
self.head_object_response(
499-
ContentLength=0
500-
), # Source object (zero size)
501-
{
502-
'Error': {'Code': '404', 'Message': 'Not Found'}
503-
}, # At destination object does not exists
504-
self.copy_object_response(), # Copy Request when object does not exists
505-
self.delete_object_response(), # Delete Request for move object
506-
]
507-
self.run_cmd(cmdline, expected_rc=0)
508-
self.assertEqual(len(self.operations_called), 4)
509-
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
510-
self.assertEqual(self.operations_called[1][0].name, 'HeadObject')
511-
self.assertEqual(self.operations_called[2][0].name, 'CopyObject')
512-
self.assertEqual(self.operations_called[3][0].name, 'DeleteObject')
460+
# Verify the IfNoneMatch condition was set in the CopyObject request
461+
self.assertEqual(self.operations_called[1][1]['IfNoneMatch'], '*')
513462

514463
def test_mv_no_overwrite_flag_when_large_object_exists_on_target(self):
515464
cmdline = f'{self.prefix} s3://bucket1/key1.txt s3://bucket/key1.txt --no-overwrite'

0 commit comments

Comments
 (0)