diff --git a/tecken/settings.py b/tecken/settings.py index 9229cfda2..da57c7623 100644 --- a/tecken/settings.py +++ b/tecken/settings.py @@ -714,11 +714,12 @@ def dict_parser(val): COMPRESS_EXTENSIONS = _config( "COMPRESS_EXTENSIONS", - default="sym", + default=".sym,debuginfo,executable", parser=ListOf(str), doc=( - "During upload, for each file in the archive, if the extension " - "matches this list, the file gets gzip compressed before uploading." + "During upload, for each file in the archive, if the lowercased file " + "name ends with a string from this list, the file gets gzip " + "compressed before uploading." ), ) @@ -807,7 +808,7 @@ def dict_parser(val): DOWNLOAD_FILE_EXTENSIONS_ALLOWED = _config( "DOWNLOAD_FILE_EXTENSIONS_ALLOWED", - default=".sym,.dl_,.ex_,.pd_,.dbg.gz,.tar.bz2", + default=".sym,.dl_,.ex_,.pd_,.dbg.gz,.tar.bz2,executable,debuginfo", parser=ListOf(str), doc=( "A list of file extensions that if a file is NOT one of these extensions " diff --git a/tecken/tests/sample.zip b/tecken/tests/sample.zip index 221623a7e..6e6e75ad3 100644 Binary files a/tecken/tests/sample.zip and b/tecken/tests/sample.zip differ diff --git a/tecken/tests/test_upload.py b/tecken/tests/test_upload.py index ef6ad4213..173e9ac32 100644 --- a/tecken/tests/test_upload.py +++ b/tecken/tests/test_upload.py @@ -65,7 +65,7 @@ def test_dump_and_extract(tmpdir): file_listings = dump_and_extract(str(tmpdir), f, ZIP_FILE) # That .zip file has multiple files in it so it's hard to rely # on the order. - assert len(file_listings) == 3 + assert len(file_listings) == 10 for file_listing in file_listings: assert file_listing.path assert os.path.isfile(file_listing.path) @@ -89,7 +89,7 @@ def test_dump_and_extract_duplicate_name_same_size(tmpdir): def test_should_compressed_key(settings): - settings.COMPRESS_EXTENSIONS = ["bar"] + settings.COMPRESS_EXTENSIONS = [".bar"] assert should_compressed_key("foo.bar") assert should_compressed_key("foo.BAR") assert not should_compressed_key("foo.exe") @@ -124,11 +124,26 @@ def mock_api_call(self, operation_name, api_params): # yep, bucket exists return {} - if operation_name == "HeadObject" and api_params["Key"] == ( - "v0/flag/deadbeef/flag.jpeg" + # For some files, return the correct size so that they don't need to be re-uploaded. + # Generated with: find . -type f -exec stat -f "\"%N\": %z," {} ';' + # (plus some manual tweaking + removal) + files_with_correct_sizes = { + "v0/flag/deadbeef/flag.jpeg": 69183, + "v0/buildid/06c989e85fe320a137f27850399fc12ad889ddc9/executable": 22656, + "v0/buildid/06c989e85fe320a137f27850399fc12ad889ddc9/debuginfo": 162944, + "v0/libfakeopenh264.so/E889C906E35FA12037F27850399FC12A0/libfakeopenh264.so.sym": 12241, + "v0/libnssckbi.so/DFDDFE2B9D3AF892658B766CC9230B320/libnssckbi.so.sym": 161968, + "v0/uuid/4C862AB7-AB9A-3B6F-9CAB-3FE44CB6FB97/executable": 18724, + "v0/uuid/4C862AB7-AB9A-3B6F-9CAB-3FE44CB6FB97/debuginfo.dSYM.tar": 153600, + "v0/libfakeopenh264.dylib/4C862AB7AB9A3B6F9CAB3FE44CB6FB970/libfakeopenh264.dylib.sym": 5201, + } + + if ( + operation_name == "HeadObject" + and api_params["Key"] in files_with_correct_sizes ): # correct size, no need to upload - return {"ContentLength": 69183} + return {"ContentLength": files_with_correct_sizes[api_params["Key"]]} if operation_name == "HeadObject" and api_params["Key"] == ( "v0/xpcshell.dbg/A7D6F1BB18CD4CB48/xpcshell.sym" diff --git a/tecken/upload/utils.py b/tecken/upload/utils.py index 683c53113..899a3eea5 100644 --- a/tecken/upload/utils.py +++ b/tecken/upload/utils.py @@ -140,8 +140,7 @@ def key_existing(client, bucket, key): def should_compressed_key(key_name): """Return true if, based on this key name, the content should be gzip compressed.""" - key_extension = os.path.splitext(key_name)[1].lower()[1:] - return key_extension in settings.COMPRESS_EXTENSIONS + return key_name.lower().endswith(tuple(settings.COMPRESS_EXTENSIONS)) def get_key_content_type(key_name): diff --git a/tecken/upload/views.py b/tecken/upload/views.py index 488409ea0..e66efcc0c 100644 --- a/tecken/upload/views.py +++ b/tecken/upload/views.py @@ -48,7 +48,7 @@ class NoPossibleBucketName(Exception): matched to one you can use.""" -_not_hex_characters = re.compile(r"[^a-f0-9]", re.I) +_not_uuid_characters = re.compile(r"[^a-f0-9\-]", re.I) # This list of filenames is used to validate a zip and also when iterating # over the extracted zip. @@ -67,10 +67,12 @@ def check_symbols_archive_file_listing(file_listings): f"'{snippet}' which is not allowed" ) # Now check that the filename is matching according to these rules: - # 1. Either //hex/, + # 1. Either ///, # 2. Or, /-symbols.txt # Anything else should be considered and unrecognized file pattern # and thus rejected. + # For , we accept both pure-hex IDs as well as UUIDs with dashes, + # such as 05E23BCB-EFB0-330B-809B-1EEAC8884B86. split = file_listing.name.split("/") if split[-1] in _ignorable_filenames: continue @@ -80,8 +82,8 @@ def check_symbols_archive_file_listing(file_listings): # when it'd become a key. if invalid_key_name_characters(split[0] + split[2]): return f"Invalid character in filename {file_listing.name!r}" - # Check that the middle part is only hex characters. - if not _not_hex_characters.findall(split[1]): + # Check that the middle part is only uuid characters, i.e. hex and dashes. + if not _not_uuid_characters.findall(split[1]): continue elif len(split) == 1: if file_listing.name.lower().endswith("-symbols.txt"):