Skip to content

Commit 994385e

Browse files
committed
Add --start-idx=<n> option to start download directly from item <n>
1 parent ee89550 commit 994385e

File tree

2 files changed

+47
-38
lines changed

2 files changed

+47
-38
lines changed

AUTHORS.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@ Patches and Suggestions
1919
-----------------------
2020

2121
- VM Brasseur
22+
- Russ Magee <rmagee@gmail.com>

internetarchive/cli/ia_download.py

Lines changed: 46 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
-R, --retries=<retries> Set number of retries to <retries> [default: 5].
3434
-I, --itemlist=<file> Download items from a specified file. Itemlists should
3535
be a plain text file with one identifier per line.
36+
-n, --start-idx=<n> Start immediately at item <n>
3637
-S, --search=<query> Download items returned from a specified search query.
3738
-P, --search-parameters=<key:value>... Download items returned from a specified search query.
3839
-g, --glob=<pattern> Only download files whose filename matches the
@@ -110,6 +111,7 @@ def main(argv, session: ArchiveSession) -> None:
110111
'--download-history': Use(bool),
111112
'--parameters': Use(lambda x: get_args_dict(x, query_string=True)),
112113
'--source': list,
114+
'--start-idx': Use(lambda x: x[0]),
113115
'--exclude-source': list,
114116
'--timeout': Or([], And(Use(lambda t: ast.literal_eval(t[0])), Or(int, float),
115117
error=timeout_msg))
@@ -128,6 +130,8 @@ def main(argv, session: ArchiveSession) -> None:
128130
print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr)
129131
sys.exit(1)
130132

133+
start_idx = int(args['--start-idx'])-1
134+
131135
retries = int(args['--retries'])
132136
ids: list[File | str] | Search | TextIO
133137

@@ -176,44 +180,48 @@ def main(argv, session: ArchiveSession) -> None:
176180
else:
177181
item_index = None
178182

179-
try:
180-
item = session.get_item(identifier)
181-
except Exception as exc:
182-
print(f'{identifier}: failed to retrieve item metadata - errors', file=sys.stderr)
183-
raise
184-
if 'You are attempting to make an HTTPS' in str(exc):
185-
print(f'\n{exc}', file=sys.stderr)
186-
sys.exit(1)
187-
else:
188-
continue
189-
190-
# Otherwise, download the entire item.
191-
ignore_history_dir = True if not args['--download-history'] else False
192-
_errors = item.download(
193-
files=files,
194-
formats=args['--format'],
195-
glob_pattern=args['--glob'],
196-
exclude_pattern=args['--exclude'],
197-
dry_run=args['--dry-run'],
198-
verbose=not args['--quiet'],
199-
ignore_existing=args['--ignore-existing'],
200-
checksum=args['--checksum'],
201-
destdir=args['--destdir'],
202-
no_directory=args['--no-directories'],
203-
retries=retries,
204-
item_index=item_index,
205-
ignore_errors=True,
206-
on_the_fly=args['--on-the-fly'],
207-
no_change_timestamp=args['--no-change-timestamp'],
208-
params=args['--parameters'],
209-
ignore_history_dir=ignore_history_dir,
210-
source=args['--source'],
211-
exclude_source=args['--exclude-source'],
212-
stdout=args['--stdout'],
213-
timeout=args['--timeout'],
214-
)
215-
if _errors:
216-
errors.append(_errors)
183+
if start_idx != None and i < start_idx:
184+
pass
185+
else:
186+
try:
187+
item = session.get_item(identifier)
188+
except Exception as exc:
189+
print(f'{identifier}: failed to retrieve item metadata - errors', file=sys.stderr)
190+
raise
191+
if 'You are attempting to make an HTTPS' in str(exc):
192+
print(f'\n{exc}', file=sys.stderr)
193+
sys.exit(1)
194+
else:
195+
continue
196+
197+
# Otherwise, download the entire item.
198+
ignore_history_dir = True if not args['--download-history'] else False
199+
_errors = item.download(
200+
files=files,
201+
formats=args['--format'],
202+
glob_pattern=args['--glob'],
203+
exclude_pattern=args['--exclude'],
204+
dry_run=args['--dry-run'],
205+
verbose=not args['--quiet'],
206+
ignore_existing=args['--ignore-existing'],
207+
checksum=args['--checksum'],
208+
destdir=args['--destdir'],
209+
no_directory=args['--no-directories'],
210+
retries=retries,
211+
item_index=item_index,
212+
ignore_errors=True,
213+
on_the_fly=args['--on-the-fly'],
214+
no_change_timestamp=args['--no-change-timestamp'],
215+
params=args['--parameters'],
216+
ignore_history_dir=ignore_history_dir,
217+
source=args['--source'],
218+
exclude_source=args['--exclude-source'],
219+
stdout=args['--stdout'],
220+
timeout=args['--timeout'],
221+
)
222+
if _errors:
223+
errors.append(_errors)
224+
##endif (start_idx)
217225
if errors:
218226
# TODO: add option for a summary/report.
219227
sys.exit(1)

0 commit comments

Comments
 (0)