Skip to content

Commit 49ab68a

Browse files
committed
Fix BadGzipFile #42
1 parent 73751ae commit 49ab68a

File tree

3 files changed

+40
-24
lines changed

3 files changed

+40
-24
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ packages = ["pywaybackup"]
77

88
[project]
99
name = "pywaybackup"
10-
version = "4.1.0"
10+
version = "4.1.1"
1111
description = "Query and download archive.org as simple as possible."
1212
authors = [
1313
{ name = "bitdruid", email = "bitdruid@outlook.com" }

pywaybackup/archive_download.py

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import threading
55
import time
66
import urllib.parse
7+
from gzip import BadGzipFile
78
from http import HTTPStatus
89
from importlib.metadata import version
910
from socket import timeout
@@ -174,16 +175,20 @@ def _download_loop(self, worker: Worker):
174175
download_attempt += 1 # try again 2x with same connection
175176
vb.write(
176177
verbose=True,
177-
content=f"\n-----> Worker: {worker.id} \
178-
- Attempt: [{worker.attempt}/{retry_max_attempt}] \
179-
Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}] \
180-
- {e.__class__.__name__} - requesting again in 50 seconds...",
178+
content=(
179+
f"\n-----> Worker: {worker.id}"
180+
f" - Attempt: [{worker.attempt}/{retry_max_attempt}]"
181+
f" Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}]"
182+
f" - {e.__class__.__name__} - requesting again in 50 seconds..."
183+
),
181184
)
182185
vb.write(
183186
verbose=False,
184-
content=f"Worker: {worker.id} \
185-
- Snapshot {worker.snapshot.counter}/{self.sc._snapshot_total} \
186-
- requesting again in 50 seconds...",
187+
content=(
188+
f"Worker: {worker.id}"
189+
f" - Snapshot {worker.snapshot.counter}/{self.sc._snapshot_total}"
190+
f" - requesting again in 50 seconds..."
191+
),
187192
)
188193
time.sleep(50)
189194
continue
@@ -193,26 +198,32 @@ def _download_loop(self, worker: Worker):
193198
download_attempt = download_max_attempt # try again 1x with new connection
194199
vb.write(
195200
verbose=True,
196-
content=f"\n-----> Worker: {worker.id} \
197-
- Attempt: [{worker.attempt}/{retry_max_attempt}] \
198-
Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}] \
199-
- {e.__class__.__name__} - renewing connection in 15 seconds...",
201+
content=(
202+
f"\n-----> Worker: {worker.id}"
203+
f" - Attempt: [{worker.attempt}/{retry_max_attempt}]"
204+
f" Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}]"
205+
f" - {e.__class__.__name__} - renewing connection in 15 seconds..."
206+
),
200207
)
201208
vb.write(
202209
verbose=False,
203-
content=f"Worker: {worker.id} \
204-
- Snapshot {worker.snapshot.counter}/{self.sc._snapshot_total} \
205-
- renewing connection in 15 seconds...",
210+
content=(
211+
f"Worker: {worker.id}"
212+
f" - Snapshot {worker.snapshot.counter}/{self.sc._snapshot_total}"
213+
f" - renewing connection in 15 seconds..."
214+
),
206215
)
207216
time.sleep(15)
208217
worker.refresh_connection()
209218
continue
210219
else:
211220
ex.exception(
212-
message=f"\n-----> Worker: {worker.id} \
213-
- Attempt: [{worker.attempt}/{retry_max_attempt}] \
214-
Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}] \
215-
- EXCEPTION - {e}",
221+
message=(
222+
f"\n-----> Worker: {worker.id}"
223+
f" - Attempt: [{worker.attempt}/{retry_max_attempt}]"
224+
f" Snapshot ID: [{worker.snapshot.counter}/{self.sc._snapshot_total}]"
225+
f" - EXCEPTION - {e}"
226+
),
216227
e=e,
217228
)
218229
worker.attempt = retry_max_attempt
@@ -279,7 +290,11 @@ def _download(self, worker: Worker):
279290
if not os.path.isfile(context.output_file):
280291
with open(context.output_file, "wb") as file:
281292
if context.response.getheader("Content-Encoding") == "gzip":
282-
context.response_data = gzip.decompress(context.response_data)
293+
try:
294+
context.response_data = gzip.decompress(context.response_data)
295+
except BadGzipFile:
296+
vb.write(verbose=None, content=f"Worker: {worker.id} - GZIP DECOMPRESS SKIPPED - {context.snapshot_url}")
297+
pass
283298
file.write(context.response_data)
284299

285300
# check if file is downloaded

pywaybackup/db.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
)
1818
from sqlalchemy.ext.declarative import declarative_base
1919
from sqlalchemy.orm import sessionmaker
20+
from typing import Optional # python 3.8
2021

2122
Base = declarative_base()
2223

@@ -143,31 +144,31 @@ def write_progress(self, done: int, total: int):
143144
)
144145
self.session.commit()
145146

146-
def get_progress(self) -> str | None:
147+
def get_progress(self) -> Optional[str]:
147148
"""
148149
str or None: Progress string (e.g., '5 / 10') or None if not found.
149150
"""
150151
return self.session.execute(
151152
select(waybackup_job.query_progress).where(waybackup_job.query_identifier == self.query_identifier)
152153
).scalar_one_or_none()
153154

154-
def get_insert_complete(self) -> int | None:
155+
def get_insert_complete(self) -> Optional[int]:
155156
"""
156157
int or None: 1 if complete, 0 if not, or None if not found.
157158
"""
158159
return self.session.execute(
159160
select(waybackup_job.insert_complete).where(waybackup_job.query_identifier == self.query_identifier)
160161
).scalar_one_or_none()
161162

162-
def get_index_complete(self) -> int | None:
163+
def get_index_complete(self) -> Optional[int]:
163164
"""
164165
int or None: 1 if complete, 0 if not, or None if not found.
165166
"""
166167
return self.session.execute(
167168
select(waybackup_job.index_complete).where(waybackup_job.query_identifier == self.query_identifier)
168169
).scalar_one_or_none()
169170

170-
def get_filter_complete(self) -> int | None:
171+
def get_filter_complete(self) -> Optional[int]:
171172
"""
172173
int or None: 1 if complete, 0 if not, or None if not found.
173174
"""

0 commit comments

Comments
 (0)