Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Changelog
3.0.0b134 (unreleased)
----------------------

- change all references of md5 to sha256
- Fix mass copy/paste functionality


Expand Down
54 changes: 54 additions & 0 deletions castle/cms/_scripts/md5_to_sha256.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import argparse
import hashlib
import logging

from castle.cms.files import aws
from castle.cms.services.google.youtube import get_youtube_service
import plone.api as api
from zope.component.hooks import setSite


logger = logging.getLogger(__name__)
RESOURCES_KEY_PREFIX = 'archiveresources/'

parser = argparse.ArgumentParser(
description='...')
parser.add_argument('--site-id', dest='site_id', default='Castle')
args, _ = parser.parse_known_args()


def update_s3_resources():
bucket_name = api.portal.get_registry_record(
'castle.aws_s3_bucket_name')
s3_conn, bucket = aws.get_bucket(s3_bucket=bucket_name)
objects_summary_list = bucket.objects.all()
for object_summary in objects_summary_list:
md5_key = object_summary.key
obj = s3_conn.get_object(key=md5_key)
fidata = obj.get('Body')
copy_source = {
'Bucket': bucket.name,
'Key': md5_key
}
sha256 = hashlib.sha256(fidata).hexdigest()
sha256_content_path = '{0}{1}/{2}/{3}/{4}'.format(
RESOURCES_KEY_PREFIX, sha256[0], sha256[1], sha256[2], sha256
)
try:
# check if the key already exists
bucket.Object(sha256_content_path).load()
logger.info(f'key {md5_key} already exists, skipping')
except botocore.exceptions.ClientError:
# this is what we want, the sha256 hashed version shouldn't exist yet
new_obj = bucket.Object(sha256_content_path)
resp_copy = new_obj.copy(copy_source)
if resp_copy.status == 200:
resp_delete = bucket.Object(md5_key).delete()
if resp_delete.status != 200:
logger.error(f'Unable to delete md5 hashed key {md5_key}. This key should not be used for security purposes')
raise Exception(f'Unable to delete key {md5_key}.')

if __name__ == '__main__':
site = app[args.site_id] # noqa
setSite(site)
update_s3_resources(site)
10 changes: 5 additions & 5 deletions castle/cms/_scripts/templates/watch-run.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
times = {}


def md5(fname):
hash_md5 = hashlib.md5()
def sha256(fname):
hash_sha256 = hashlib.sha256()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
hash_sha256.update(chunk)
return hash_sha256.hexdigest()


def check(directory):
Expand All @@ -29,7 +29,7 @@ def check(directory):
if check(filepath):
changed = True
else:
hashed = md5(filepath)
hashed = sha256(filepath)
current_hash = times.get(filepath, None)

times[filepath] = hashed
Expand Down
6 changes: 3 additions & 3 deletions castle/cms/archival.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def __init__(self, site, UrlOpener=SubrequestUrlOpener):
except AttributeError:
self.archives = self.site._archives = OOBTree()
self.path_to_uid = self.site._archives_path_to_uid = OOBTree()
self.resources = {} # path -> md5 dict
self.resources = {} # path -> sha256 dict
self.errors = [] # so we can ignore

try:
Expand Down Expand Up @@ -424,10 +424,10 @@ def move_resource(self, url, keep_ext=False, use_vhm=True):
fidata = fidata.replace(sub_url, new_url)

# upload to amazon and get url!
md5 = hashlib.md5(fidata).hexdigest()
sha256 = hashlib.sha256(fidata).hexdigest()

content_path = '{0}{1}/{2}/{3}/{4}'.format(
RESOURCES_KEY_PREFIX, md5[0], md5[1], md5[2], md5
RESOURCES_KEY_PREFIX, sha256[0], sha256[1], sha256[2], sha256
)
if keep_ext and '.' in url:
ext = url.split('.')[-1]
Expand Down
16 changes: 8 additions & 8 deletions castle/cms/browser/content/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,12 +250,12 @@ def _clean_tmp(self, info):

def detect_duplicate(self, info):
dup_detector = duplicates.DuplicateDetector()
md5_hash = commands.md5(info['tmp_file'])
obj = dup_detector.get_object(md5_hash)
sha256_hash = commands.sha256(info['tmp_file'])
obj = dup_detector.get_object(sha256_hash)
if obj is not None:
# found, use existing file...
raise duplicates.DuplicateException(obj)
return dup_detector, md5_hash
return dup_detector, sha256_hash

def handle_auto_folder_creation(self, folder, type_):
# we only auto publish built-in repositories, otherwise, leave it be
Expand Down Expand Up @@ -366,18 +366,18 @@ def create_file_content(self, info):
original_location == location
folder = utils.recursive_create_path(self.context, location)

md5_hash = dup_detector = None
sha256_hash = dup_detector = None

if original_location == location:
# first off, check first that it wasn't already uploaded...
# we are only checking images for now...
if type_ == 'Image' and commands.md5:
dup_detector, md5_hash = self.detect_duplicate(info)
if type_ == 'Image' and commands.sha256:
dup_detector, sha256_hash = self.detect_duplicate(info)
self.handle_auto_folder_creation(folder, type_)

obj = self.create_object(folder, type_, info)
if type_ == 'Image' and md5_hash and dup_detector:
dup_detector.register(obj, md5_hash)
if type_ == 'Image' and sha256_hash and dup_detector:
dup_detector.register(obj, sha256_hash)
return obj

def create_object(self, folder, type_, info):
Expand Down
2 changes: 2 additions & 0 deletions castle/cms/browser/security/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ def __call__(self):
for key in keyring:
if key is None:
continue
# this code matches the hashing code in rocket chat and cannot
# be updated to sha256
value = hmac.new(key, user + salt, sha).hexdigest()
if _is_equal(value, token):
return json.dumps({
Expand Down
35 changes: 35 additions & 0 deletions castle/cms/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,41 @@ def __call__(self, filepath):
logger.warn('gs not installed. Some metadata might remain in PDF files.')


class SHA256SubProcess(BaseSubProcess):
"""
To get sha256 hash of files on the filesystem so
large files do not need to be loaded into
memory to be checked
"""
if os.name == 'nt':
bin_name = 'sha256sum.exe'
else:
bin_name = 'sha256sum'

def __call__(self, filepath):
cmd = [self.binary, filepath]
hashval = self._run_command(cmd)
try:
val = hashval.split('=')[1].strip()
return val
except:
try:
val = hashval.split(' ')[0].strip()
return val
except IOError:
logger.exception("No sha256 installed. castle.cms "
"will not be able to detect sha256 of files.")
return None


try:
sha256 = SHA256SubProcess()
except IOError:
sha256 = None


# keep the md5 classes for any residual objects that use it for hashed values
# if object uses md5 hash, it should be marked with the attribut useforsecurity = False
class MD5SubProcess(BaseSubProcess):
"""
To get md5 hash of files on the filesystem so
Expand Down
10 changes: 5 additions & 5 deletions castle/cms/files/duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@ def __init__(self):
self.annotations = IAnnotations(self.site)
self.data = self.annotations.get(DATA_KEY)

def register(self, obj, hash):
def register(self, obj, hashed):
if self.data is None:
self.annotations[DATA_KEY] = OOBTree()
self.data = self.annotations[DATA_KEY]
self.data[hash] = IUUID(obj)
self.data[hashed] = IUUID(obj)

def get_object(self, hash):
def get_object(self, hashed):
if self.data is None:
return None
if hash not in self.data:
if hashed not in self.data:
return None
uuid = self.annotations[DATA_KEY][hash]
uuid = self.annotations[DATA_KEY][hashed]
return uuidToObject(uuid)


Expand Down
34 changes: 22 additions & 12 deletions castle/cms/media/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from shutil import copyfile, rmtree
from tempfile import mkdtemp

from castle.cms.commands import avconv, md5
from castle.cms.commands import avconv, sha256, md5
from castle.cms.files import aws
from castle.cms.services.google import youtube
from collective.celery.utils import getCelery
Expand Down Expand Up @@ -34,14 +34,22 @@ def process(context):
return

# by default, assume all non-mp4 videos need to be converted
# but in reality, all videos need converting, even mp4.
# md5 is only what makes this possible
convert_it = video.contentType.split('/')[-1] != 'mp4'
if md5 is not None:
old_hash = getattr(context, '_file_hash', None)
current_hash = md5(bfilepath)
if old_hash is None or old_hash != current_hash:
convert_it = True
old_hash = getattr(context, '_file_hash', None)
if old_hash is None:
# video has not been converted
context.__setattr__('useforsecurity', True)
if context.useforsecurity is None:
# video has been converted but hashed with md5
context.__setattr__('useforsecurity', False)
if context.useforsecurity is False:
if md5 is not None:
current_hash = md5(bfilepath)
else:
if sha256 is not None:
current_hash = sha256(bfilepath)
if old_hash is None or old_hash != current_hash:
convert_it = True

if context.image and not convert_it:
# already an mp4 and already has a screen grab
Expand All @@ -51,7 +59,8 @@ def process(context):
try:
youtube.upload(context, bfilepath, filename=video.filename)
# saving hash tells us we do not need to convert anymore...
context._file_hash = md5(bfilepath)
context.__setattr__('useforsecurity', True)
context._file_hash = sha256(bfilepath)
convert_it = False
except Exception:
logger.error('Error uploading youtube video', exc_info=True)
Expand All @@ -68,11 +77,12 @@ def process(context):
logger.info('Could not convert video', exc_info=True)
if (os.path.exists(output_filepath) and
os.path.getsize(output_filepath) > 0):
if md5 is not None:
if sha256 is not None:
try:
context._file_hash = md5(output_filepath)
context.__setattr__('useforsecurity', True)
context._file_hash = sha256(output_filepath)
except Exception:
logger.info('Could not get md5', exc_info=True)
logger.info('Could not get sha256', exc_info=True)
if not getCelery().conf.task_always_eager:
context._p_jar.sync()
fi = open(output_filepath)
Expand Down
20 changes: 12 additions & 8 deletions castle/cms/tasks/files.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging

import transaction
from castle.cms.commands import md5
from castle.cms.commands import md5, sha256
from castle.cms.files import aws
from castle.cms.media import video
from castle.cms.services.google import youtube
Expand Down Expand Up @@ -87,13 +87,17 @@ def youtube_video_edited(obj):

# we can only edit if file has NOT changed
# otherwise, it will be reuploaded and original deleted
if md5 is not None:
old_hash = getattr(obj, '_file_hash', None)
if old_hash is not None:
current_hash = md5(bfilepath)
if old_hash != current_hash:
# dive out, we don't want to edit
return
old_hash = getattr(obj, '_file_hash', None)
if old_hash is not None:
if obj.useforsecurity is True:
if sha256 is not None:
current_hash = sha256(bfilepath)
if obj.useforsecurity is False:
if md5 is not None:
current_hash = md5(bfilepath)
if old_hash != current_hash:
# dive out, we don't want to edit
return

youtube.edit(obj)

Expand Down
33 changes: 33 additions & 0 deletions castle/cms/tests/test_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,39 @@ def test_upload(self):
self.assertEquals(fileOb.file.data, 'X' * 1024 * 5)
return fileOb

def test_upload_image(self):
self.request.form.update({
'action': 'chunk-upload',
'chunk': '1',
'chunkSize': 1024,
'totalSize': 1024 * 5,
'image': BytesIO('X' * 1024),
'name': 'foobar.jpg'
})
cc = content.Creator(self.portal, self.request)
data = json.loads(cc())
self.assertTrue('id' in data)
self.assertTrue(data['success'])
self.request.form.update({
'id': data['id']
})

for idx in range(4):
self.request.form.update({
'action': 'chunk-upload',
'chunk': str(idx + 2),
'image': BytesIO('X' * 1024)
})
cc = content.Creator(self.portal, self.request)
data = json.loads(cc())
self.assertTrue(data['success'])

self.assertTrue(data['valid'])
self.assertTrue('url' in data)
imgOb = api.content.get(path='/image-repository/foobar.jpg')
self.assertEquals(imgOb.image.data, 'X' * 1024 * 5)
return imgOb

def test_update_upload(self):
fileOb = self.test_upload()
api.content.transition(obj=fileOb, to_state='published')
Expand Down