From 4df5eb449dc7d73e40116d7a8f9e1c6bd1e497e2 Mon Sep 17 00:00:00 2001 From: TrellixVulnTeam Date: Sun, 16 Oct 2022 14:15:10 +0000 Subject: [PATCH] Adding tarfile member sanitization to extractall() --- .../trj_correlated_motions_transformer.py | 21 +++++++++- .../transformers/trj_hbond_transformer.py | 21 +++++++++- .../transformers/trj_mmgbsa_transformer.py | 21 +++++++++- .../transformers/trj_nonbonded_transformer.py | 42 ++++++++++++++++++- .../transformers/trj_rms_transformer.py | 21 +++++++++- .../transformers/trj_torsion_transformer.py | 21 +++++++++- .../trj_water_contacts_transformer.py | 21 +++++++++- transformers/utils/preprocess.py | 21 +++++++++- 8 files changed, 180 insertions(+), 9 deletions(-) diff --git a/transformers/transformers/trj_correlated_motions_transformer.py b/transformers/transformers/trj_correlated_motions_transformer.py index 5df798e..3c0e95f 100644 --- a/transformers/transformers/trj_correlated_motions_transformer.py +++ b/transformers/transformers/trj_correlated_motions_transformer.py @@ -394,7 +394,26 @@ def process(structure_dict): trj_dir = trjtar elif tarfile.is_tarfile(trjtar): with tarfile.open(name=trjtar, mode='r:gz') as tfile: - tfile.extractall() + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tfile) logger.info('extracting frameset') trj_dir = tfile.getnames()[0] else: diff --git a/transformers/transformers/trj_hbond_transformer.py b/transformers/transformers/trj_hbond_transformer.py index d708a2c..0ed434f 100644 --- a/transformers/transformers/trj_hbond_transformer.py +++ b/transformers/transformers/trj_hbond_transformer.py @@ -479,7 +479,26 @@ def _process(structure_dict): trj_dir = trjtar elif tarfile.is_tarfile(trjtar): with tarfile.open(name=trjtar, mode='r:gz') as tfile: - tfile.extractall() + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tfile) logger.info('extracting frameset') trj_dir = tfile.getnames()[0] else: diff --git a/transformers/transformers/trj_mmgbsa_transformer.py b/transformers/transformers/trj_mmgbsa_transformer.py index 11fd87f..cabec2a 100644 --- a/transformers/transformers/trj_mmgbsa_transformer.py +++ b/transformers/transformers/trj_mmgbsa_transformer.py @@ -169,7 +169,26 @@ def _process(structure_dict): trj_dir = trjtar elif tarfile.is_tarfile(trjtar): with tarfile.open(name=trjtar, mode='r:gz') as tfile: - tfile.extractall() + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tfile) logger.info('extracting frameset') trj_dir = tfile.getnames()[0] else: diff --git a/transformers/transformers/trj_nonbonded_transformer.py b/transformers/transformers/trj_nonbonded_transformer.py index 21696e1..ac208bf 100644 --- a/transformers/transformers/trj_nonbonded_transformer.py +++ b/transformers/transformers/trj_nonbonded_transformer.py @@ -652,7 +652,26 @@ def _process(structure_dict): trj_dir = trjtar elif tarfile.is_tarfile(trjtar): with tarfile.open(name=trjtar, mode='r:gz') as tfile: - tfile.extractall() + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tfile) logger.info('extracting frameset') trj_dir = tfile.getnames()[0] else: @@ -687,7 +706,26 @@ def _process(structure_dict): with tarfile.open(nonbonded_raw, 'r:bz2') as tar: if not os.path.isdir('./tmp'): os.mkdir('./tmp') - tar.extractall(path='./tmp') + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path="./tmp") members = tar.getmembers() with tarfile.open(outfile_raw, 'w:bz2') as tar: for member in members: diff --git a/transformers/transformers/trj_rms_transformer.py b/transformers/transformers/trj_rms_transformer.py index d3028a2..b1c3bcf 100644 --- a/transformers/transformers/trj_rms_transformer.py +++ b/transformers/transformers/trj_rms_transformer.py @@ -262,7 +262,26 @@ def _process(structure_dict): trj_dir = trjtar elif tarfile.is_tarfile(trjtar): with tarfile.open(name=trjtar, mode='r:gz') as tfile: - tfile.extractall() + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tfile) logger.info('extracting frameset') trj_dir = tfile.getnames()[0] else: diff --git a/transformers/transformers/trj_torsion_transformer.py b/transformers/transformers/trj_torsion_transformer.py index 83be81d..83f6794 100755 --- a/transformers/transformers/trj_torsion_transformer.py +++ b/transformers/transformers/trj_torsion_transformer.py @@ -288,7 +288,26 @@ def _process(structure_dict): trj_dir = trjtar elif tarfile.is_tarfile(trjtar): with tarfile.open(name=trjtar, mode='r:gz') as tfile: - tfile.extractall() + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tfile) trj_dir = tfile.getnames()[0] else: raise RuntimeError('trjtar is neither a directory nor a tarfile') diff --git a/transformers/transformers/trj_water_contacts_transformer.py b/transformers/transformers/trj_water_contacts_transformer.py index e5ca106..078bc32 100644 --- a/transformers/transformers/trj_water_contacts_transformer.py +++ b/transformers/transformers/trj_water_contacts_transformer.py @@ -216,7 +216,26 @@ def _process(structure_dict): trj_dir = trjtar elif tarfile.is_tarfile(trjtar): with tarfile.open(name=trjtar, mode='r:gz') as tfile: - tfile.extractall() + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tfile) logger.info('extracting frameset') trj_dir = tfile.getnames()[0] else: diff --git a/transformers/utils/preprocess.py b/transformers/utils/preprocess.py index eef639b..bab7d33 100644 --- a/transformers/utils/preprocess.py +++ b/transformers/utils/preprocess.py @@ -526,7 +526,26 @@ def preprocess_torsion(api, dataset, data_type=None, no_ligand=False): trj_torsion = get_trj_torsion(api, stid, dir=tempdir) with tarfile.open(trj_torsion, 'r:gz') as tar: - tar.extractall(path=tempdir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=tempdir) torsion_ids = pd.read_csv('torsion_ids.csv', sep=',', index_col=0) for tid in torsion_ids.index: