@@ -187,6 +187,10 @@ def read_from(cls, fo: IO[bytes]) -> Tuple["LocalFileHeader", bytes]:
187187 int .from_bytes (data [n : n + 8 ], "little" )
188188 for n in range (0 , len (data ), 8 )
189189 ]
190+ # If a non-zip64-aware compressor produced this with a file
191+ # whose uncompressed length was exactly UINT32_MAX, we
192+ # don't go down this code path because it won't include the
193+ # extra.
190194 if inst .usize == UINT32_MAX :
191195 inst .usize = sizes .pop (0 )
192196 if inst .csize == UINT32_MAX :
@@ -223,6 +227,7 @@ def dump(self) -> Tuple[bytes, int]:
223227 fn = self .filename .encode ("utf-8" )
224228 flags |= FLAG_FILENAME_UTF8
225229
230+ # This modifies the extra of the original, but is idempotent.
226231 usize = self .usize
227232 csize = self .csize
228233 min_ver = self .version_needed
@@ -311,6 +316,14 @@ def from_lfh_and_relative_offset(
311316 filename = lfh .filename , # TODO ordering
312317 )
313318
319+ def replace_extra (self , num : int , value : bytes ) -> None :
320+ n : List [Tuple [int , bytes ]] = []
321+ for i , v in self .parsed_extra :
322+ if i != num :
323+ n .append ((i , v ))
324+ n .append ((num , value ))
325+ self .parsed_extra = n
326+
314327 # TODO not happy with the name
315328 def dump (self ) -> bytes :
316329 flags = self .flags
@@ -322,36 +335,136 @@ def dump(self) -> bytes:
322335 except UnicodeEncodeError :
323336 fn = self .filename .encode ("utf-8" )
324337 flags |= FLAG_FILENAME_UTF8
325- # TODO dump these too, they're important
326- extra = b""
327- comment = b""
338+
339+ # This modifies the extra of the original, but is idempotent.
340+ usize = self .usize
341+ csize = self .csize
342+ relative_offset_of_lfh = self .relative_offset_of_lfh
343+ min_ver = self .version_needed
344+ if (
345+ self .usize >= UINT32_MAX
346+ or self .csize >= UINT32_MAX
347+ or self .relative_offset_of_lfh >= UINT32_MAX
348+ ):
349+ zip64_extra = struct .pack (
350+ "<QQQ" , self .usize , self .csize , self .relative_offset_of_lfh
351+ )
352+ usize = UINT32_MAX
353+ csize = UINT32_MAX
354+ relative_offset_of_lfh = UINT32_MAX
355+ self .replace_extra (1 , zip64_extra )
356+ min_ver = max (self .version_needed , ZIP64_VERSION )
357+ extra = b"" .join (
358+ struct .pack ("<HH" , i [0 ], len (i [1 ])) + i [1 ] for i in self .parsed_extra
359+ )
360+ extra_length = len (extra )
361+
362+ comment = (self .file_comment or "" ).encode ("utf-8" )
363+ comment_length = len (comment )
364+
328365 return (
329366 struct .pack (
330367 CENTRAL_DIRECTORY_FORMAT ,
331368 self .signature ,
332369 self .version_made_by ,
333- self . version_needed ,
370+ min_ver ,
334371 flags ,
335372 self .method ,
336373 self .mtime ,
337374 self .mdate ,
338375 self .crc32 ,
339- self . csize ,
340- self . usize ,
376+ csize ,
377+ usize ,
341378 # TODO always recalculates filename length, I guess?
342379 len (fn ),
343- 0 , # TODO extra_length
344- 0 , # TODO comment_length
380+ extra_length ,
381+ comment_length ,
345382 self .disk_start ,
346383 self .internal_attributes ,
347384 self .external_attributes ,
348- self . relative_offset_of_lfh ,
385+ relative_offset_of_lfh ,
349386 )
350387 + fn
351388 + extra
352389 + comment
353390 )
354391
392+ @classmethod
393+ def read_from (cls , fo : IO [bytes ]) -> Tuple ["CentralDirectoryHeader" , bytes ]:
394+ """
395+ This isn't currently necessary or part of the public api when streaming.
396+
397+ Only used for testing...
398+ """
399+ buf = _readn (fo , struct .calcsize (CENTRAL_DIRECTORY_FORMAT ))
400+ args = struct .unpack (CENTRAL_DIRECTORY_FORMAT , buf )
401+ inst = cls (* args )
402+
403+ if inst .signature != CENTRAL_DIRECTORY_SIGNATURE :
404+ raise ValueError ("Invalid signature %0x" % (inst .signature ,))
405+
406+ filename_data = _readn (fo , inst .filename_length )
407+ buf += filename_data
408+
409+ if inst .flags & FLAG_FILENAME_UTF8 :
410+ inst .filename = filename_data .decode ("utf-8" ) # can raise
411+ else :
412+ inst .filename = filename_data .decode ("cp437" )
413+
414+ if inst .flags & FLAG_DATA_DESCRIPTOR :
415+ # I am not a fan of the complexity and additional validation
416+ # required to support this flag; although Python's zipfile.py can
417+ # generate such files, I don't see the usefulness and would like to
418+ # guarantee that files output by this library will not contain them.
419+ raise NotImplementedError ("Data descriptor" )
420+
421+ if inst .extra_length :
422+ extra : List [Tuple [int , bytes ]] = []
423+ extra_data = _readn (fo , inst .extra_length )
424+ # print(" ".join("%02x" % c for c in extra_data))
425+
426+ i = 0
427+ # The len() - 4 is to avoid `_slicen` needing to raise an exception
428+ # if there are 1-3 bytes left. We raise that exception ourselves
429+ # directly below the loop to make it more clear that it's leftover
430+ # data at the _end_ rather than one that is completely malformed.
431+ while i < len (extra_data ) - 4 :
432+ extra_id , data_size = struct .unpack (
433+ "<HH" ,
434+ _slicen (extra_data , i , 4 ),
435+ )
436+ # print("Extra", i, extra_id, data_size)
437+ i += 4
438+ data = _slicen (extra_data , i , data_size )
439+ i += data_size
440+ extra .append ((extra_id , data ))
441+
442+ if extra_id == 1 : # zip64 entry
443+ sizes = [
444+ int .from_bytes (data [n : n + 8 ], "little" )
445+ for n in range (0 , len (data ), 8 )
446+ ]
447+ # If a non-zip64-aware compressor produced this with a file
448+ # whose uncompressed length was exactly UINT32_MAX, we
449+ # don't go down this code path because it won't include the
450+ # extra.
451+ if inst .usize == UINT32_MAX :
452+ inst .usize = sizes .pop (0 )
453+ if inst .csize == UINT32_MAX :
454+ inst .csize = sizes .pop (0 )
455+ if inst .relative_offset_of_lfh == UINT32_MAX :
456+ inst .relative_offset_of_lfh = sizes .pop (0 )
457+ # Can we be strict here?
458+ # if len(sizes) != 0:
459+ # raise ValueError("Extra zip64 extra in CDH")
460+ if i != len (extra_data ):
461+ raise ValueError ("Extra length" )
462+ inst .parsed_extra = tuple (extra )
463+ buf += extra_data
464+
465+ return inst , buf
466+ return inst
467+
355468
356469ZIP64_EOCD_FORMAT = "<LQHHLLQQQQ"
357470ZIP64_EOCD_SIGNATURE = 0x06064B50
0 commit comments