diff --git a/.gitignore b/.gitignore index fe524a4..e22335b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ MANIFEST .DS_Store build/ dist/ +*.egg-info diff --git a/setup.py b/setup.py index d5786b5..e32a604 100755 --- a/setup.py +++ b/setup.py @@ -16,4 +16,6 @@ author="Ludvig Ericson", author_email="ludvig@lericson.se", description="Simple, quick Amazon AWS S3 interface", long_description=long_description, - packages=["simples3"]) + packages=["simples3"], + entry_points=dict(console_scripts=[ + 'simples3-rm=simples3.commandline:rm_tool'])) diff --git a/simples3/__init__.py b/simples3/__init__.py index 9194426..c0ff4a8 100644 --- a/simples3/__init__.py +++ b/simples3/__init__.py @@ -123,6 +123,6 @@ __version__ = "1.1.0" -from .bucket import S3File, S3Bucket, S3Error, KeyNotFound -S3File, S3Bucket, S3Error, KeyNotFound # pyflakes -__all__ = "S3File", "S3Bucket", "S3Error" +from .bucket import S3File, S3Item, S3Bucket, S3Error, KeyNotFound +S3File, S3Item, S3Bucket, S3Error, KeyNotFound # pyflakes +__all__ = "S3File", "S3Item", "S3Bucket", "S3Error" diff --git a/simples3/bucket.py b/simples3/bucket.py index 8870488..ee65952 100644 --- a/simples3/bucket.py +++ b/simples3/bucket.py @@ -10,6 +10,7 @@ import datetime import warnings from xml.etree import cElementTree as ElementTree +from collections import namedtuple from contextlib import contextmanager from urllib import quote_plus from base64 import b64encode @@ -164,6 +165,9 @@ def __init__(self, value, **kwds): def put_into(self, bucket, key): return bucket.put(key, **self.kwds) +# A holder for tuples yielded by S3Bucket.listdir function. +S3Item = namedtuple('S3Item', 'key modified etag size') + class S3Listing(object): """Representation of a single pageful of S3 bucket listing data.""" @@ -200,7 +204,7 @@ def _el2item(self, el): modify = _iso8601_dt(get("LastModified")) etag = get("ETag") size = int(get("Size")) - return (key, modify, etag, size) + return S3Item._make((key, modify, etag, size)) class S3Bucket(object): default_encoding = "utf-8" @@ -372,7 +376,8 @@ def _get_listing(self, args): def listdir(self, prefix=None, marker=None, limit=None, delimiter=None): """List bucket contents. - Yields tuples of (key, modified, etag, size). + Yields tuples of (key, modified, etag, size). For convenience, + such tuples are actually instances of named tuple S3Item. *prefix*, if given, predicates `key.startswith(prefix)`. *marker*, if given, predicates `key > marker`, lexicographically. diff --git a/simples3/commandline.py b/simples3/commandline.py new file mode 100644 index 0000000..aabb848 --- /dev/null +++ b/simples3/commandline.py @@ -0,0 +1,54 @@ +from __future__ import absolute_import, print_function + +import os +from itertools import imap, izip_longest, repeat, takewhile +from operator import attrgetter + +from .bucket import S3Bucket + +__all__ = 'BatchDeleter', 'rm_tool' + + +class BatchDeleter(object): + def __init__(self, bucket, prefix, dry_run=True): + self.bucket = bucket + self.prefix = prefix + self.dry_run = dry_run + + def check_key(self, item): + if item: + assert item.key.startswith(self.prefix), ("%(key)s doesn't have prefix %(prefix)s" + % dict(key=item.key, prefix=self.prefix)) + return True + + def __call__(self, items): + victims = tuple(takewhile(self.check_key, items)) + if victims and not self.dry_run: + self.bucket.delete(*map(attrgetter('key'), victims)) + return victims + +def list_matching_items(bucket, prefix): + return takewhile(lambda i: i.key.startswith(prefix), bucket.listdir(prefix)) + +def grouper(n, iterable, fillvalue=None): + "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx" + return izip_longest(fillvalue=fillvalue, *repeat(iter(iterable), n)) + +def rm_tool(): + bucket = S3Bucket('what.bucket.you.want.to.clean.up.today.eh', + access_key=os.environ['AWS_ACCESS_KEY_ID'], + secret_key=os.environ['AWS_SECRET_ACCESS_KEY']) + prefix = '' + dry_run = True + + #from multiprocessing import Pool + #pool = Pool(12) + #mapper = pool.imap_unordered + mapper = imap + + n = 0 + for deleted in mapper(BatchDeleter(bucket, prefix, dry_run), + grouper(1000, list_matching_items(bucket, prefix))): + n += len(deleted) + print("Would delete" if dry_run else "Deleted", len(deleted), "keys.") + print("Keys processed so far:", n)