-"""Module contains classes for input/output processing."""
-
-import os
-import json
-import numpy as np
-
-from .utilities import decipher_atom_key, unit_cell_to_lattice_array
-
-
-class _CorruptedPDBFile(Exception):
- def __init__(self, message):
- self.message = message
-
-
-class _CorruptedXYZFile(Exception):
- def __init__(self, message):
- self.message = message
-
-
-class _FileAlreadyExists(Exception):
- def __init__(self, message):
- self.message = message
-
-
-class _NotADictionary(Exception):
- def __init__(self, message):
- self.message = message
-
-
-class _FileTypeError(Exception):
- def __init__(self, message):
- self.message = message
-
-
-[docs]class Input(object):
- """Class used to load and process input files."""
-
- def __init__(self):
- self._load_funcs = {
- '.xyz': self._read_xyz,
- '.pdb': self._read_pdb,
- '.mol': self._read_mol,
- }
-
-[docs] def load_file(self, filepath):
- """
- This function opens any type of a readable file and decompose
- the file object into a list, for each line, of lists containing
- splitted line strings using space as a spacer.
-
- Parameters
- ----------
- filepath : :class:`str`
- The full path or a relative path to any type of file.
-
- Returns
- -------
- :class:`dict`
- Returns a dictionary containing the molecular information
- extracted from the input files. This information will
- vary with file type and information stored in it.
- The data is sorted into lists that contain one feature
- for example key atom_id: [atom_id_1, atom_id_2]
- Over the process of analysis this dictionary will be updated
- with new data.
- """
- self.file_path = filepath
- _, self.file_type = os.path.splitext(filepath)
- _, self.file_name = os.path.split(filepath)
- with open(filepath) as ffile:
- self.file_content = ffile.readlines()
-
- return (self._load_funcs[self.file_type]())
-
-[docs] def load_rdkit_mol(self, mol):
- """
- Return molecular data from :class:`rdkit.Chem.rdchem.Mol` object.
-
- Parameters
- ----------
- mol : :class:`rdkit.Chem.rdchem.Mol`
- A molecule object from RDKit.
-
- Returns
- -------
- :class:`dict`
- A dictionary with ``elements`` and ``coordinates`` as keys
- containing molecular data extracted from
- :class:`rdkit.Chem.rdchem.Mol` object.
-
- """
- self.system = {
- 'elements': np.empty(
- mol.GetNumAtoms(), dtype=str),
- 'coordinates': np.empty((mol.GetNumAtoms(), 3))
- }
- for atom in mol.GetAtoms():
- atom_id = atom.GetIdx()
- atom_sym = atom.GetSymbol()
- x, y, z = mol.GetConformer().GetAtomPosition(atom_id)
- self.system['elements'][atom_id] = atom_sym
- self.system['coordinates'][atom_id] = x, y, z
- return self.system
-
- def _read_xyz(self):
- """"""
- try:
- self.system = dict()
- self.file_remarks = self.file_content[1]
- self.system['elements'] = np.array(
- [i.split()[0] for i in self.file_content[2:]])
- self.system['coordinates'] = np.array(
- [[float(j[0]), float(j[1]), float(j[2])]
- for j in [i.split()[1:] for i in self.file_content[2:]]])
- return self.system
- except IndexError:
- raise _CorruptedXYZFile(
- "The XYZ file is corrupted in some way. For example, an empty "
- "line at the end etc. or it is a trajectory. If the latter is "
- "the case, please use `trajectory` module, otherwise fix it.")
-
- def _read_pdb(self):
- """"""
- if sum([i.count('END ') for i in self.file_content]) > 1:
- raise _CorruptedPDBFile(
- "Multiple 'END' statements were found in this PDB file."
- "If this is a trajectory, use a trajectory module, "
- "Otherwise, fix it.")
- self.system = dict()
- self.system['remarks'] = [
- i for i in self.file_content if i[:6] == 'REMARK'
- ]
- self.system['unit_cell'] = np.array([
- float(x)
- for i in self.file_content for x in
- [i[6:15], i[15:24], i[24:33], i[33:40], i[40:47], i[47:54]]
- if i[:6] == 'CRYST1'
- ])
- if self.system['unit_cell'].any():
- self.system['lattice'] = unit_cell_to_lattice_array(self.system[
- 'unit_cell'])
- self.system['atom_ids'] = np.array(
- [
- i[12:16].strip() for i in self.file_content
- if i[:6] == 'HETATM' or i[:6] == 'ATOM '
- ],
- dtype='<U8')
- self.system['elements'] = np.array(
- [
- i[76:78].strip() for i in self.file_content
- if i[:6] == 'HETATM' or i[:6] == 'ATOM '
- ],
- dtype='<U8')
- self.system['coordinates'] = np.array(
- [[float(i[30:38]), float(i[38:46]), float(i[46:54])]
- for i in self.file_content
- if i[:6] == 'HETATM' or i[:6] == 'ATOM '])
- return self.system
-
- def _read_mol(self):
- """-V3000"""
- self.system = dict()
- if self.file_content[2] != '\n':
- self.system['remarks'] = self.file_content[2]
- file_body = [i.split() for i in self.file_content]
- elements = []
- coordinates = []
- atom_data = False
- for line in file_body:
- if len(line) > 2:
- if line[2] == 'END' and line[3] == 'ATOM':
- atom_data = False
- if atom_data is True:
- elements.append(line[3])
- coordinates.append(line[4:7])
- if line[2] == 'BEGIN' and line[3] == 'ATOM':
- atom_data = True
- self.system['elements'] = np.array(elements)
- self.system['coordinates'] = np.array(coordinates, dtype=float)
- return self.system
-
-
-[docs]class Output(object):
- """Class used to process and save output files."""
-
- def __init__(self):
- self.cwd = os.getcwd()
- self._save_funcs = {
- 'xyz': self._save_xyz,
- 'pdb': self._save_pdb,
- }
-
-[docs] def dump2json(self, obj, filepath, override=False, **kwargs):
- """
- Dump a dictionary into a JSON dictionary.
-
- Uses the json.dump() function.
-
- Parameters
- ----------
- obj : :class:`dict`
- A dictionary to be dumpped as JSON file.
-
- filepath : :class:`str`
- The filepath for the dumped file.
-
- override : :class:`bool`
- If True, any file in the filepath will be override. (default=False)
- """
- # We make sure that the object passed by the user is a dictionary.
- if isinstance(obj, dict):
- pass
- else:
- raise _NotADictionary(
- "This function only accepts dictionaries as input")
- # We check if the filepath has a json extenstion, if not we add it.
- if str(filepath[-4:]) == 'json':
- pass
- else:
- filepath = ".".join((str(filepath), "json"))
- # First we check if the file already exists. If yes and the override
- # keyword is False (default), we will raise an exception. Otherwise
- # the file will be overwritten.
- if override is False:
- if os.path.isfile(filepath):
- raise _FileAlreadyExists(
- "The file {0} already exists. Use a different filepath, "
- "or set the 'override' kwarg to True.".format(filepath))
- # We dump the object to the json file. Additional kwargs can be passed.
- with open(filepath, 'w+') as json_file:
- json.dump(obj, json_file, **kwargs)
-
-[docs] def dump2file(self, obj, filepath, override=False, **kwargs):
- """
- Dump a dictionary into a file. (Extensions: XYZ or PDB)
-
- Parameters
- ----------
- obj : :class:`dict`
- A dictionary containing molecular information.
-
- filepath : :class:`str`
- The filepath for the dumped file.
-
- override : :class:`bool`
- If True, any file in the filepath will be override. (default=False)
- """
- # First we check if the file already exists. If yes and the override
- # keyword is False (default), we will raise an exception. Otherwise
- # the file will be overwritten.
- if override is False:
- if os.path.isfile(filepath):
- raise _FileAlreadyExists(
- "The file {0} already exists. Use a different filepath, "
- "or set the 'override' kwarg to True.".format(filepath))
- if str(filepath[-3:]) not in self._save_funcs.keys():
- raise _FileTypeError(
- "The {0} file extension is "
- "not supported for dumping a MolecularSystem or a Molecule. "
- "Please use XYZ or PDB.".format(str(filepath[-3:])))
- self._save_funcs[str(filepath[-3:])](obj, filepath, **kwargs)
-
- def _save_xyz(self, system, filepath, **kwargs):
- """"""
- # Initial settings.
- settings = {
- 'elements': 'elements',
- 'coordinates': 'coordinates',
- 'remark': " ",
- 'decipher': False,
- 'forcefield': None,
- }
- settings.update(kwargs)
- # Extract neccessary data.
- elements = system['elements']
- coordinates = system['coordinates']
- if settings['decipher'] is True:
- elements = np.array([
- decipher_atom_key(
- key, forcefield=settings['forcefield']) for key in elements
- ])
- string = '{0:0d}\n{1}\n'.format(len(elements), str(settings['remark']))
- for i, j in zip(elements, coordinates):
- string += '{0} {1:.2f} {2:.2f} {3:.2f}\n'.format(i, *j)
- with open(filepath, 'w+') as file_:
- file_.write(string)
-
- def _save_pdb(self, system, filepath, **kwargs):
- """"""
- settings = {
- 'atom_ids': 'atom_ids',
- 'elements': 'elements',
- 'coordinates': 'coordinates',
- 'cryst': 'unit_cell',
- 'connect': None,
- 'remarks': None,
- 'space_group': None,
- 'resName': "MOL",
- 'chainID': 'A',
- 'resSeq': 1,
- 'decipher': False,
- 'forcefield': None,
- }
- settings.update(kwargs)
- # We create initial string that we will gradually extend while we
- # process the data and in the end it will be written into a pdb file.
- string = "REMARK File generated using pyWINDOW."
- # Number of items (atoms) in the provided system.
- len_ = system[settings['atom_ids']].shape[0]
- # We process the remarks, if any, given by the user (optional).
- if isinstance(settings['remarks'], (list, tuple)):
- # If a list or tuple of remarks each is written at a new line
- # with the REMARK prefix not to have to long remark line.
- for remark in settings['remarks']:
- string = "\n".join([string, 'REMARK {0}'.format(remark)])
- else:
- # Otherwise if it's a single string or an int/float we just write
- # it under single remark line, otherwise nothing happens.
- if isinstance(settings['remarks'], (str, int, float)):
- remark = settings['remarks']
- string = "\n".join([string, 'REMARK {0}'.format(remark)])
- # If there is a unit cell (crystal data) provided we need to add it.
- if settings['cryst'] in system.keys():
- if system[settings['cryst']].any():
- cryst_line = "CRYST1"
- cryst = system[settings['cryst']]
- # The user have to provide the crystal data as a list/array
- # of six items containing unit cell edges lengths a, b and c
- # in x, y and z directions and three angles, or it can be.
- # Other options are not allowed for simplicity. It can convert
- # from the lattice array using function from utilities.
- for i in cryst[:3]:
- cryst_line = "".join([cryst_line, "{0:9.3f}".format(i)])
- for i in cryst[3:]:
- cryst_line = "".join([cryst_line, "{0:7.2f}".format(i)])
- # This is kind of messy, by default the data written in PDB
- # file should be P1 symmetry group therefore containing all
- # atom coordinates and not considering symmetry operations.
- # But, user can still define a space group if he wishes to.
- if settings['space_group'] is not None:
- space_group = settings['space_group']
- else:
- space_group = "{0}".format("P1")
- cryst_line = " ".join([cryst_line, space_group])
- # We add the unit cell parameters to the main string.
- string = "\n".join([string, cryst_line])
- # For the sake of code readability we extract interesting data from the
- # system. Atom_ids are the atom ids written at the third column of a
- # PDB file and the user has here the freedom to use the forcefield
- # assigned ones. However, they have to specify it directly using the
- # atom_ids key. Otherwise, the 'elements' array from system object
- # will be used, that is also used for elements in the last column of
- # a PDB file. Other parameters like residue name (resName), chain id
- # (chainID) and residue sequence (resSeq) can be controlled by
- # appropriate parameter keyword passed to this function, Otherwise
- # the default values from settings dictionary are used.
- atom_ids = system[settings['atom_ids']]
- elements = system[settings['elements']]
- # If the 'elements' array of the system need deciphering atom keys this
- # is done if the user sets decipher to True. They can also provided
- # forcefield, otherwise it's None which equals to DLF.
- if settings['decipher'] is True:
- elements = np.array([
- decipher_atom_key(
- key, forcefield=settings['forcefield']) for key in elements
- ])
- coordinates = system[settings['coordinates']]
- for i in range(len_):
- atom_line = "ATOM {0:5d}".format(i + 1)
- atom_id = "{0:4}".format(atom_ids[i].center(4))
- resName = "{0:3}".format(settings['resName'])
- chainID = settings['chainID']
- atom_line = " ".join([atom_line, atom_id, resName, chainID])
- resSeq = str(settings['resSeq']).rjust(4)
- atom_line = "".join([atom_line, resSeq])
- coor = "{0:8.3f}{1:8.3f}{2:8.3f}".format(
- coordinates[i][0],
- coordinates[i][1],
- coordinates[i][2], )
- atom_line = " ".join([atom_line, coor])
- big_space = "{0}".format(" ".center(22))
- element = "{0:2} ".format(elements[i].rjust(2))
- atom_line = "".join([atom_line, big_space, element])
- string = "\n".join([string, atom_line])
- # The connectivity part is to be written after a function calculating
- # connectivity is finished
- # "Everything that has a beginning has an end" by Neo. :)
- string = "\n".join([string, 'END'])
- # Check if .pdb extension is missing from filepath.
- if filepath[-4:].lower() != '.pdb':
- filepath = ".".join((filepath, 'pdb'))
- # Write the string to a a PDB file.
- with open(filepath, 'w+') as file:
- file.write(string)
-
-"""
-Defines :class:`MolecularSystem` and :class:`Molecule` classes.
-
-This module is the most important part of the ``pywindow`` package, as it is
-at the frontfront of the interaction with the user. The two main classes
-defined here: :class:`MolecularSystem` and :class:`Molecule` are used to
-store and analyse single molecules or assemblies of single molecules.
-
-The :class:`MolecularSystem` is used as a first step to the analysis. It allows
-to load data, to refine it (rebuild molecules in a periodic system, decipher
-force field atom ids) and to extract single molecules for analysis as
-:class:`Molecule` instances.
-
-To get started see :class:`MolecularSystem`.
-
-To get started with the analysis of Molecular Dynamic trajectories go to
-:mod:`pywindow.trajectory`.
-
-"""
-
-import os
-import numpy as np
-from copy import deepcopy
-from scipy.spatial import ConvexHull
-
-from .io_tools import Input, Output
-from .utilities import (discrete_molecules,
- decipher_atom_key,
- molecular_weight,
- center_of_mass,
- max_dim,
- pore_diameter,
- opt_pore_diameter,
- sphere_volume,
- find_windows,
- shift_com,
- create_supercell,
- is_inside_polyhedron,
- find_average_diameter,
- calculate_pore_shape,
- circumcircle,
- to_list,
- align_principal_ax,
- get_inertia_tensor,
- get_gyration_tensor,
- calc_asphericity,
- calc_acylidricity,
- calc_relative_shape_anisotropy,
- find_windows_new,
- calculate_window_diameter,
- get_window_com,
- window_shape)
-
-
-class _MolecularSystemError(Exception):
- def __init__(self, message):
- self.message = message
-
-
-class _NotAModularSystem(Exception):
- def __init__(self, message):
- self.message = message
-
-
-class _Shape:
- """
- Class containing shape descriptors.
-
- This class allows other classes, such as :class:`Pore` and
- :class:`Molecule`, inherit shape descriptors (applicable to any set of
- points in a 3D Cartesian space, let it be a shape of an intrinsic pore or
- shape of a molecule) such as asphericity, acylidricity and relative shape
- anisotropy. This class should not be used by itself.
-
- """
-
- @property
- def _asphericity(self):
- """
- Return asphericity of a shape.
-
- The asphericity of a shape is weighted by the mass assigned to each
- coordinate (associated with the element). In case if `elements` is
- `None`, mass of each element = 1 and this returns a non-weighted value.
-
- Returns
- -------
- :class:`float`
- The asphericity of a shape.
-
- """
- return calc_asphericity(self.elements, self.coordinates)
-
- @property
- def _acylidricity(self):
- """
- Return acylidricity of a shape.
-
- The acylidricity of a shape is weighted by the mass assigned to each
- coordinate (associated with the element). In case if `elements` is
- `None`, mass of each element = 1 and this returns a non-weighted value.
-
- Returns
- -------
- :class:`float`
- The acylidricity of a shape.
-
- """
- return calc_acylidricity(self.elements, self.coordinates)
-
- @property
- def _relative_shape_anisotropy(self):
- """
- Return relative shape anisotropy of a shape.
-
- The relative shape anisotropy of a shape is weighted by the mass
- assigned to each coordinate (associated with the element). In case if
- `elements` is `None`, mass of each element = 1 and this returns a
- non-weighted value.
-
- Returns
- -------
- :class:`float`
- The relative shape anisotropy of a shape.
-
- """
- return calc_relative_shape_anisotropy(
- self.elements, self.coordinates
- )
-
- @property
- def inertia_tensor(self):
- """
- Return inertia tensor of a shape.
-
- The inertia tensor of a shape is weighted by the mass assigned to each
- coordinate (associated with the element). In case if `elements` is
- `None`, mass of each element = 1 and this returns a non-weighted value.
-
- Returns
- -------
- :class:`numpy.array`
- The inertia tensor of a shape.
-
- """
- return get_inertia_tensor(
- self.elements, self.coordinates
- )
-
- @property
- def gyration_tensor(self):
- """
- Return gyration tensor of a shape.
-
- The gyration tensor of a shape is weighted by the mass assigned to each
- coordinate (associated with the element). In case if `elements` is
- `None`, mass of each element = 1 and this returns a non-weighted value.
-
- Returns
- -------
- :class:`numpy.array`
- The gyration tensor of a shape.
-
- """
- return get_gyration_tensor(self.elements, self.coordinates)
-
-
-class _Pore(_Shape):
- """Under development."""
-
- def __init__(self, elements, coordinates, shape=False, **kwargs):
- self._elements, self._coordinates = elements, coordinates
- self.diameter, self.closest_atom = pore_diameter(
- elements, coordinates, **kwargs)
- self.spherical_volume = sphere_volume(self.diameter / 2)
- if 'com' in kwargs.keys():
- self.centre_coordinates = kwargs['com']
- else:
- self.centre_coordinates = center_of_mass(elements, coordinates)
- self.optimised = False
-
- def optimise(self, **kwargs):
- (self.diameter, self.closest_atom,
- self.centre_coordinates) = opt_pore_diameter(
- self._elements,
- self._coordinates,
- com=self.centre_coordinates,
- **kwargs)
- self.spherical_volume = sphere_volume(self.diameter / 2)
- self.optimised = True
-
- def get_shape(self):
- super().__init__(calculate_pore_shape(self._coordinates))
-
- def reset(self):
- self.__init__(self._elements, self._coordinates)
-
-
-class _Window:
- """Under development."""
-
- def __init__(self, window, key, elements, coordinates, com_adjust):
- self.raw_data = window
- self.index = key
- self.mol_coordinates = coordinates
- self.mol_elements = elements
- self.com_correction = com_adjust
- self.shape = None
- self.convexhull = None
-
- def calculate_diameter(self, **kwargs):
- diameter = calculate_window_diameter(
- self.raw_data, self.mol_elements, self.mol_coordinates, **kwargs
- )
- return diameter
-
- def calculate_centre_of_mass(self, **kwargs):
- com = get_window_com(
- self.raw_data, self.mol_elements, self.mol_coordinates,
- self.com_correction, **kwargs
- )
- return com
-
- def get_shape(self, **kwargs):
- self.shape = window_shape(
- self.raw_data, self.mol_elements, self.mol_coordinates
- )
- return self.shape
-
- def get_convexhull(self):
- hull = ConvexHull(self.shape)
- verticesx = np.append(
- self.shape[hull.vertices, 0], self.shape[hull.vertices, 0][0]
- )
- verticesy = np.append(
- self.shape[hull.vertices, 1], self.shape[hull.vertices, 1][0]
- )
- self.convexhull = verticesx, verticesy
- return self.convexhull
-
-
-[docs]class Molecule(_Shape):
- """
- Container for a single molecule.
-
- This class is meant for the analysis of single molecules, molecular pores
- especially. The object passed to this class should therefore be a finite
- and interconnected individuum.
-
- This class should not be initialised directly, but result from
- :func:`MolecularSystem.system_to_molecule()` or
- :func:`MolecularSystem.make_modular()`.
-
- Methods in :class:`Molecule` allow to calculate:
-
- 1. The maximum diameter of a molecule.
-
- 2. The average diameter of a molecule.
-
- 3. The intrinsic void diameter of a molecule.
-
- 4. The intrinsic void volume of a molecule.
-
- 5. The optimised intrinsic void diameter of a molecule.
-
- 6. The optimised intrinsic void volume of a molecule.
-
- 7. The circular diameter of a window of a molecule.
-
- Attributes
- ----------
- mol : :class:`dict`
- The :attr:`Molecular.System.system` dictionary passed to the
- :class:`Molecule` which is esentially a container of the information
- that compose a molecular entity, such as the coordinates and
- atom ids and/or elements.
-
- no_of_atoms : :class:`int`
- The number of atoms in the molecule.
-
- elements : :class:`numpy.array`
- An array containing the elements, as strings, composing the molecule.
-
- atom_ids : :class:`numpy.array` (conditional)
- If the :attr:`Molecule.mol` contains 'atom_ids' keyword, the force
- field ids of the elements.
-
- coordinates : :class:`numpy.array`
- The x, y and z atomic Cartesian coordinates of all elements.
-
- parent_system : :class:`str`
- The :attr:`name` of :class:`MolecularSystem` passed to
- :class:`Molecule`.
-
- molecule_id : :class:`any`
- The molecule id passed when initialising :class:`Molecule`.
-
- properties : :class:`dict`
- A dictionary that is populated by the output of
- :class:`Molecule` methods.
-
- """
-
- def __init__(self, mol, system_name, mol_id):
- self._Output = Output()
- self.mol = mol
- self.no_of_atoms = len(mol['elements'])
- self.elements = mol['elements']
- if 'atom_ids' in mol.keys():
- self.atom_ids = mol['atom_ids']
- self.coordinates = mol['coordinates']
- self.parent_system = system_name
- self.molecule_id = mol_id
- self.properties = {'no_of_atoms': self.no_of_atoms}
- self._windows = None
-
- @classmethod
- def _load_rdkit_mol(cls, mol, system_name='rdkit', mol_id=0):
- """
- Create a :class:`Molecule` from :class:`rdkit.Chem.rdchem.Mol`.
-
- To be used only by expert users.
-
- Parameters
- ----------
- mol : :class:`rdkit.Chem.rdchem.Mol`
- An RDKit molecule object.
-
- Returns
- -------
- :class:`pywindow.molecular.Molecule`
- :class:`Molecule`
-
- """
- return cls(Input().load_rdkit_mol(mol), system_name, mol_id)
-
-[docs] def full_analysis(self, ncpus=1, **kwargs):
- """
- Perform a full structural analysis of a molecule.
-
- This invokes other methods:
-
- 1. :attr:`molecular_weight()`
-
- 2. :attr:`calculate_centre_of_mass()`
-
- 3. :attr:`calculate_maximum_diameter()`
-
- 4. :attr:`calculate_average_diameter()`
-
- 5. :attr:`calculate_pore_diameter()`
-
- 6. :attr:`calculate_pore_volume()`
-
- 7. :attr:`calculate_pore_diameter_opt()`
-
- 8. :attr:`calculate_pore_volume_opt()`
-
- 9. :attr:`calculate_pore_diameter_opt()`
-
- 10. :attr:`calculate_windows()`
-
- Parameters
- ----------
- ncpus : :class:`int`
- Number of CPUs used for the parallelised parts of
- :func:`pywindow.utilities.find_windows()`. (default=1=serial)
-
- Returns
- -------
- :attr:`Molecule.properties`
- The updated :attr:`Molecule.properties` with returns of all
- used methods.
-
- """
- self.molecular_weight()
- self.calculate_centre_of_mass()
- self.calculate_maximum_diameter()
- self.calculate_average_diameter()
- self.calculate_pore_diameter()
- self.calculate_pore_volume()
- self.calculate_pore_diameter_opt(**kwargs)
- self.calculate_pore_volume_opt(**kwargs)
- self.calculate_windows(ncpus=ncpus, **kwargs)
- # self._circumcircle(**kwargs)
- return self.properties
-
- def _align_to_principal_axes(self, align_molsys=False):
- if align_molsys:
- self.coordinates[0] = align_principal_ax_all(
- self.elements, self.coordinates
- )
- else:
- self.coordinates[0] = align_principal_ax(
- self.elements, self.coordinates
- )
- self.aligned_to_principal_axes = True
-
- def _get_pore(self):
- return Pore(self.elements, self.coordinates)
-
- def _get_shape(self, **kwargs):
- super().__init__(self.coordinates, elements=self.elements)
-
- def _get_windows(self, **kwargs):
- windows = find_windows_new(self.elements, self.coordinates, **kwargs)
- if windows:
- self.windows = [
- Window(np.array(windows[0][window]), window, windows[1],
- windows[2], windows[3])
- for window in windows[0] if window != -1
- ]
- return self.windows
- else:
- return None
-
-[docs] def calculate_centre_of_mass(self):
- """
- Return the xyz coordinates of the centre of mass of a molecule.
-
- Returns
- -------
- :class:`numpy.array`
- The centre of mass of the molecule.
-
- """
- self.centre_of_mass = center_of_mass(self.elements, self.coordinates)
- self.properties['centre_of_mass'] = self.centre_of_mass
- return self.centre_of_mass
-
-[docs] def calculate_maximum_diameter(self):
- """
- Return the maximum diamension of a molecule.
-
- Returns
- -------
- :class:`float`
- The maximum dimension of the molecule.
-
- """
- self.maxd_atom_1, self.maxd_atom_2, self.maximum_diameter = max_dim(
- self.elements, self.coordinates)
- self.properties['maximum_diameter'] = {
- 'diameter': self.maximum_diameter,
- 'atom_1': int(self.maxd_atom_1),
- 'atom_2': int(self.maxd_atom_2),
- }
- return self.maximum_diameter
-
-[docs] def calculate_average_diameter(self, **kwargs):
- """
- Return the average diamension of a molecule.
-
- Returns
- -------
- :class:`float`
- The average dimension of the molecule.
-
- """
- self.average_diameter = find_average_diameter(
- self.elements, self.coordinates, **kwargs)
- return self.average_diameter
-
-[docs] def calculate_pore_diameter(self):
- """
- Return the intrinsic pore diameter.
-
- Returns
- -------
- :class:`float`
- The intrinsic pore diameter.
-
- """
- self.pore_diameter, self.pore_closest_atom = pore_diameter(
- self.elements, self.coordinates)
- self.properties['pore_diameter'] = {
- 'diameter': self.pore_diameter,
- 'atom': int(self.pore_closest_atom),
- }
- return self.pore_diameter
-
-[docs] def calculate_pore_volume(self):
- """
- Return the intrinsic pore volume.
-
- Returns
- -------
- :class:`float`
- The intrinsic pore volume.
-
- """
- self.pore_volume = sphere_volume(self.calculate_pore_diameter() / 2)
- self.properties['pore_volume'] = self.pore_volume
- return self.pore_volume
-
-[docs] def calculate_pore_diameter_opt(self, **kwargs):
- """
- Return the intrinsic pore diameter (for the optimised pore centre).
-
- Similarly to :func:`calculate_pore_diameter` this method returns the
- the intrinsic pore diameter, however, first a better approximation
- of the pore centre is found with optimisation.
-
- Returns
- -------
- :class:`float`
- The intrinsic pore diameter.
-
- """
- (self.pore_diameter_opt, self.pore_opt_closest_atom,
- self.pore_opt_COM) = opt_pore_diameter(self.elements,
- self.coordinates, **kwargs)
- self.properties['pore_diameter_opt'] = {
- 'diameter': self.pore_diameter_opt,
- 'atom_1': int(self.pore_opt_closest_atom),
- 'centre_of_mass': self.pore_opt_COM,
- }
- return self.pore_diameter_opt
-
-[docs] def calculate_pore_volume_opt(self, **kwargs):
- """
- Return the intrinsic pore volume (for the optimised pore centre).
-
- Similarly to :func:`calculate_pore_volume` this method returns the
- the volume intrinsic pore diameter, however, for the
- :func:`calculate_pore_diameter_opt` returned value.
-
- Returns
- -------
- :class:`float`
- The intrinsic pore volume.
-
- """
- self.pore_volume_opt = sphere_volume(
- self.calculate_pore_diameter_opt(**kwargs) / 2)
- self.properties['pore_volume_opt'] = self.pore_volume_opt
- return self.pore_volume_opt
-
- def _calculate_pore_shape(self, filepath='shape.xyz', **kwargs):
- shape = calculate_pore_shape(self.elements, self.coordinates, **kwargs)
- shape_obj = {'elements': shape[0], 'coordinates': shape[1]}
- Output()._save_xyz(shape_obj, filepath)
- return 1
-
-[docs] def calculate_windows(self, **kwargs):
- """
- Return the diameters of all windows in a molecule.
-
- This function first finds and then measures the diameters of all the
- window in the molecule.
-
- Returns
- -------
- :class:`numpy.array`
- An array of windows' diameters.
-
- :class:`NoneType`
- If no windows were found.
-
- """
- windows = find_windows(self.elements, self.coordinates, **kwargs)
- if windows:
- self.properties.update(
- {
- 'windows': {
- 'diameters': windows[0], 'centre_of_mass': windows[1],
- }
- }
- )
- return windows[0]
- else:
- self.properties.update(
- {'windows': {'diameters': None, 'centre_of_mass': None, }}
- )
- return None
-
-[docs] def shift_to_origin(self, **kwargs):
- """
- Shift a molecule to Origin.
-
- This function takes the molecule's coordinates and adjust them so that
- the centre of mass of the molecule coincides with the origin of the
- coordinate system.
-
- Returns
- -------
- None : :class:`NoneType`
-
- """
- self.coordinates = shift_com(self.elements, self.coordinates, **kwargs)
- self._update()
-
-[docs] def molecular_weight(self):
- """
- Return the molecular weight of a molecule.
-
- Returns
- -------
- :class:`float`
- The molecular weight of the molecule.
-
- """
- self.MW = molecular_weight(self.elements)
- return self.MW
-
-[docs] def dump_properties_json(self, filepath=None, molecular=False, **kwargs):
- """
- Dump content of :attr:`Molecule.properties` to a JSON dictionary.
-
- Parameters
- ----------
- filepath : :class:`str`
- The filepath for the dumped file. If :class:`None`, the file is
- dumped localy with :attr:`molecule_id` as filename.
- (defualt=None)
-
- molecular : :class:`bool`
- If False, dump only the content of :attr:`Molecule.properties`,
- if True, dump all the information about :class:`Molecule`.
-
- Returns
- -------
- None : :class:`NoneType`
-
- """
- # We pass a copy of the properties dictionary.
- dict_obj = deepcopy(self.properties)
- # If molecular data is also required we update the dictionary.
- if molecular is True:
- dict_obj.update(self.mol)
- # If no filepath is provided we create one.
- if filepath is None:
- filepath = "_".join(
- (str(self.parent_system), str(self.molecule_id))
- )
- filepath = '/'.join((os.getcwd(), filepath))
- # Dump the dictionary to json file.
- self._Output.dump2json(dict_obj, filepath, default=to_list, **kwargs)
-
-[docs] def dump_molecule(self, filepath=None, include_coms=False, **kwargs):
- """
- Dump a :class:`Molecule` to a file (PDB or XYZ).
-
- Kwargs are passed to :func:`pywindow.io_tools.Output.dump2file()`.
-
- For validation purposes an overlay of window centres and COMs can also
- be dumped as:
-
- He - for the centre of mass
-
- Ne - for the centre of the optimised cavity
-
- Ar - for the centres of each found window
-
- Parameters
- ----------
- filepath : :class:`str`
- The filepath for the dumped file. If :class:`None`, the file is
- dumped localy with :attr:`molecule_id` as filename.
- (defualt=None)
-
- include_coms : :class:`bool`
- If True, dump also with an overlay of window centres and COMs.
- (default=False)
-
- Returns
- -------
- None : :class:`NoneType`
-
- """
- # If no filepath is provided we create one.
- if filepath is None:
- filepath = "_".join(
- (str(self.parent_system), str(self.molecule_id)))
- filepath = '/'.join((os.getcwd(), filepath))
- filepath = '.'.join((filepath, 'pdb'))
- # Check if there is an 'atom_ids' keyword in the self.mol dict.
- # Otherwise pass to the dump2file atom_ids='elements'.
- if 'atom_ids' not in self.mol.keys():
- atom_ids = 'elements'
- else:
- atom_ids = 'atom_ids'
- # Dump molecule into a file.
- # If coms are to be included additional steps are required.
- # First deepcopy the molecule
- if include_coms is True:
- mmol = deepcopy(self.mol)
- # add centre of mass (centre of not optimised pore) as 'He'.
- mmol['elements'] = np.concatenate(
- (mmol['elements'], np.array(['He'])))
- if 'atom_ids' not in self.mol.keys():
- pass
- else:
- mmol['atom_ids'] = np.concatenate(
- (mmol['atom_ids'], np.array(['He'])))
- mmol['coordinates'] = np.concatenate(
- (mmol['coordinates'],
- np.array([self.properties['centre_of_mass']])))
- # add centre of pore optimised as 'Ne'.
- mmol['elements'] = np.concatenate(
- (mmol['elements'], np.array(['Ne'])))
- if 'atom_ids' not in self.mol.keys():
- pass
- else:
- mmol['atom_ids'] = np.concatenate(
- (mmol['atom_ids'], np.array(['Ne'])))
- mmol['coordinates'] = np.concatenate(
- (mmol['coordinates'], np.array(
- [self.properties['pore_diameter_opt']['centre_of_mass']])))
- # add centre of windows as 'Ar'.
- if self.properties['windows']['centre_of_mass'] is not None:
- range_ = range(
- len(self.properties['windows']['centre_of_mass']))
- for com in range_:
- mmol['elements'] = np.concatenate(
- (mmol['elements'], np.array(['Ar'])))
- if 'atom_ids' not in self.mol.keys():
- pass
- else:
- mmol['atom_ids'] = np.concatenate(
- (mmol['atom_ids'],
- np.array(['Ar{0}'.format(com + 1)])))
- mmol['coordinates'] = np.concatenate(
- (mmol['coordinates'], np.array([
- self.properties['windows']['centre_of_mass'][com]
- ])))
- self._Output.dump2file(mmol, filepath, atom_ids=atom_ids, **kwargs)
-
- else:
- self._Output.dump2file(
- self.mol, filepath, atom_ids=atom_ids, **kwargs)
-
- def _update(self):
- self.mol['coordinates'] = self.coordinates
- self.calculate_centre_of_mass()
- self.calculate_pore_diameter_opt()
-
- def _circumcircle(self, **kwargs):
- windows = circumcircle(self.coordinates, kwargs['atom_sets'])
- if 'output' in kwargs:
- if kwargs['output'] == 'windows':
- self.properties['circumcircle'] = {'diameter': windows, }
- else:
- if windows is not None:
- self.properties['circumcircle'] = {
- 'diameter': windows[0],
- 'centre_of_mass': windows[1],
- }
- else:
- self.properties['circumcircle'] = {
- 'diameter': None,
- 'centre_of_mass': None,
- }
- return windows
-
-
-[docs]class MolecularSystem:
- """
- Container for the molecular system.
-
- To load input and initialise :class:`MolecularSystem`, one of the
- :class:`MolecularSystem` classmethods (:func:`load_file()`,
- :func:`load_rdkit_mol()` or :func:`load_system()`) should be used.
- :class:`MolecularSystem` **should not be initialised by itself.**
-
- Examples
- --------
- 1. Using file as an input:
-
- .. code-block:: python
-
- pywindow.molecular.MolecularSystem.load_file(`filepath`)
-
- 2. Using RDKit molecule object as an input:
-
- .. code-block:: python
-
- pywindow.molecular.MolecularSystem.load_rdkit_mol(rdkit.Chem.rdchem.Mol)
-
- 3. Using a dictionary (or another :attr:`MoleculeSystem.system`) as input:
-
- .. code-block:: python
-
- pywindow.molecular.MolecularSystem.load_system({...})
-
- Attributes
- ----------
- system_id : :class:`str` or :class:`int`
- The input filename or user defined.
-
- system : :class:`dict`
- A dictionary containing all the information extracted from input.
-
- molecules : :class:`list`
- A list containing all the returned :class:`Molecule` s after using
- :func:`make_modular()`.
-
- """
-
- def __init__(self):
- self._Input = Input()
- self._Output = Output()
- self.system_id = 0
-
-[docs] @classmethod
- def load_file(cls, filepath):
- """
- Create a :class:`MolecularSystem` from an input file.
-
- Recognized input file formats: XYZ, PDB and MOL (V3000).
-
- Parameters
- ----------
- filepath : :class:`str`
- The input's filepath.
-
- Returns
- -------
- :class:`pywindow.molecular.MolecularSystem`
- :class:`MolecularSystem`
-
- """
- obj = cls()
- obj.system = obj._Input.load_file(filepath)
- obj.filename = os.path.basename(filepath)
- obj.system_id = obj.filename.split(".")[0]
- obj.name, ext = os.path.splitext(obj.filename)
- return obj
-
-[docs] @classmethod
- def load_rdkit_mol(cls, mol):
- """
- Create a :class:`MolecularSystem` from :class:`rdkit.Chem.rdchem.Mol`.
-
- Parameters
- ----------
- mol : :class:`rdkit.Chem.rdchem.Mol`
- An RDKit molecule object.
-
- Returns
- -------
- :class:`pywindow.molecular.MolecularSystem`
- :class:`MolecularSystem`
-
- """
- obj = cls()
- obj.system = obj._Input.load_rdkit_mol(mol)
- return obj
-
-[docs] @classmethod
- def load_system(cls, dict_, system_id='system'):
- """
- Create a :class:`MolecularSystem` from a python :class:`dict`.
-
- As the loaded :class:`MolecularSystem` is storred as a :class:`dict` in
- the :class:`MolecularSystem.system` it can also be loaded directly from
- a :class:`dict` input. This feature is used by :mod:`trajectory` that
- extracts trajectory frames as dictionaries and returns them
- as :class:`MolecularSystem` objects through this classmethod.
-
- Parameters
- ----------
- dict_ : :class:`dict`
- A python dictionary.
-
- system_id : :class:`str` or :class:'int', optional
- Inherited or user defined system id. (default='system')
-
- Returns
- -------
- :class:`pywindow.molecular.MolecularSystem`
- :class:`MolecularSystem`
-
- """
- obj = cls()
- obj.system = dict_
- obj.system_id = system_id
- return obj
-
-[docs] def rebuild_system(self, override=False, **kwargs):
- """
- Rebuild molecules in molecular system.
-
- Parameters
- ----------
- override : :class:`bool`, optional (default=False)
- If False the rebuild molecular system is returned as a new
- :class:`MolecularSystem`, if True, the current
- :class:`MolecularSystem` is modified.
-
- """
- # First we create a 3x3x3 supercell with the initial unit cell in the
- # centre and the 26 unit cell translations around to provide all the
- # atom positions necessary for the molecules passing through periodic
- # boundary reconstruction step.
- supercell_333 = create_supercell(self.system, **kwargs)
- # smolsys = self.load_system(supercell_333, self.system_id + '_311')
- # smolsys.dump_system(override=True)
- discrete = discrete_molecules(self.system, rebuild=supercell_333)
- # This function overrides the initial data for 'coordinates',
- # 'atom_ids', and 'elements' instances in the 'system' dictionary.
- coordinates = np.array([], dtype=np.float64).reshape(0, 3)
- atom_ids = np.array([])
- elements = np.array([])
- for i in discrete:
- coordinates = np.concatenate(
- [coordinates, i['coordinates']], axis=0
- )
- atom_ids = np.concatenate([atom_ids, i['atom_ids']], axis=0)
- elements = np.concatenate([elements, i['elements']], axis=0)
- rebuild_system = {
- 'coordinates': coordinates,
- 'atom_ids': atom_ids,
- 'elements': elements
- }
- if override is True:
- self.system.update(rebuild_system)
- return None
- else:
- return self.load_system(rebuild_system)
-
-[docs] def swap_atom_keys(self, swap_dict, dict_key='atom_ids'):
- """
- Swap a force field atom id for another user-defined value.
-
- This modified all values in :attr:`MolecularSystem.system['atom_ids']`
- that match criteria.
-
- This function can be used to decipher a whole forcefield if an
- appropriate dictionary is passed to the function.
-
- Example
- -------
- In this example all atom ids 'he' will be exchanged to 'H'.
-
- .. code-block:: python
-
- pywindow.molecular.MolecularSystem.swap_atom_keys({'he': 'H'})
-
- Parameters
- ----------
- swap_dict: :class:`dict`
- A dictionary containg force field atom ids (keys) to be swapped
- with corresponding values (keys' arguments).
-
- dict_key: :class:`str`
- A key in :attr:`MolecularSystem.system` dictionary to perform the
- atom keys swapping operation on. (default='atom_ids')
-
- Returns
- -------
- None : :class:`NoneType`
-
- """
- # Similar situation to the one from decipher_atom_keys function.
- if 'atom_ids' not in self.system.keys():
- dict_key = 'elements'
- for atom_key in range(len(self.system[dict_key])):
- for key in swap_dict.keys():
- if self.system[dict_key][atom_key] == key:
- self.system[dict_key][atom_key] = swap_dict[key]
-
-[docs] def decipher_atom_keys(self, forcefield='DLF', dict_key='atom_ids'):
- """
- Decipher force field atom ids.
-
- This takes all values in :attr:`MolecularSystem.system['atom_ids']`
- that match force field type criteria and creates
- :attr:`MolecularSystem.system['elements']` with the corresponding
- periodic table of elements equivalents.
-
- If a forcefield is not supported by this method, the
- :func:`MolecularSystem.swap_atom_keys()` can be used instead.
-
- DLF stands for DL_F notation.
-
- See: C. W. Yong, Descriptions and Implementations of DL_F Notation: A
- Natural Chemical Expression System of Atom Types for Molecular
- Simulations, J. Chem. Inf. Model., 2016, 56, 1405–1409.
-
- Parameters
- ----------
- forcefield : :class:`str`
- The forcefield used to decipher atom ids. Allowed (not case
- sensitive): 'OPLS', 'OPLS2005', 'OPLSAA', 'OPLS3', 'DLF', 'DL_F'.
- (default='DLF')
-
- dict_key : :class:`str`
- The :attr:`MolecularSystem.system` dictionary key to the array
- containing the force field atom ids. (default='atom_ids')
-
- Returns
- -------
- None : :class:`NoneType`
-
- """
- # In case there is no 'atom_ids' key we try 'elements'. This is for
- # XYZ and MOL files mostly. But, we keep the dict_key keyword for
- # someone who would want to decipher 'elements' even if 'atom_ids' key
- # is present in the system's dictionary.
- if 'atom_ids' not in self.system.keys():
- dict_key = 'elements'
- # I do it on temporary object so that it only finishes when successful
- temp = deepcopy(self.system[dict_key])
- for element in range(len(temp)):
- temp[element] = "{0}".format(
- decipher_atom_key(
- temp[element], forcefield=forcefield))
- self.system['elements'] = temp
-
-[docs] def make_modular(self, rebuild=False):
- """
- Find and return all :class:`Molecule` s in :class:`MolecularSystem`.
-
- This function populates :attr:`MolecularSystem.molecules` with
- :class:`Molecule` s.
-
- Parameters
- ----------
- rebuild : :class:`bool`
- If True, run first the :func:`rebuild_system()`. (default=False)
-
- Returns
- -------
- None : :class:`NoneType`
-
- """
- if rebuild is True:
- supercell_333 = create_supercell(self.system)
- else:
- supercell_333 = None
- dis = discrete_molecules(self.system, rebuild=supercell_333)
- self.no_of_discrete_molecules = len(dis)
- self.molecules = {}
- for i in range(len(dis)):
- self.molecules[i] = Molecule(dis[i], self.system_id, i)
-
-[docs] def system_to_molecule(self):
- """
- Return :class:`MolecularSystem` as a :class:`Molecule` directly.
-
- Only to be used conditionally, when the :class:`MolecularSystem` is a
- discrete molecule and no input pre-processing is required.
-
- Returns
- -------
- :class:`pywindow.molecular.Molecule`
- :class:`Molecule`
- """
- return Molecule(self.system, self.system_id, 0)
-
- def _get_pores(self, sampling_points):
- """ Under development."""
- pores = []
- for point in sampling_points:
- pores.append(
- Pore(
- self.system['elements'],
- self.system['coordinates'],
- com=point))
- return pores
-
-[docs] def dump_system(self, filepath=None, modular=False, **kwargs):
- """
- Dump a :class:`MolecularSystem` to a file (PDB or XYZ).
-
- Kwargs are passed to :func:`pywindow.io_tools.Output.dump2file()`.
-
- Parameters
- ----------
- filepath : :class:`str`
- The filepath for the dumped file. If :class:`None`, the file is
- dumped localy with :attr:`system_id` as filename.
- (defualt=None)
-
- modular : :class:`bool`
- If False, dump the :class:`MolecularSystem` as in
- :attr:`MolecularSystem.system`, if True, dump the
- :class:`MolecularSystem` as catenated :class:Molecule objects
- from :attr:`MolecularSystem.molecules`
-
- Returns
- -------
- None : :class:`NoneType`
-
- """
- # If no filepath is provided we create one.
- if filepath is None:
- filepath = '/'.join((os.getcwd(), str(self.system_id)))
- filepath = '.'.join((filepath, 'pdb'))
- # If modular is True substitute the molecular data for modular one.
- system_dict = deepcopy(self.system)
- if modular is True:
- elements = np.array([])
- atom_ids = np.array([])
- coor = np.array([]).reshape(0, 3)
- for mol_ in self.molecules:
- mol = self.molecules[mol_]
- elements = np.concatenate((elements, mol.mol['elements']))
- atom_ids = np.concatenate((atom_ids, mol.mol['atom_ids']))
- coor = np.concatenate((coor, mol.mol['coordinates']), axis=0)
- system_dict['elements'] = elements
- system_dict['atom_ids'] = atom_ids
- system_dict['coordinates'] = coor
- # Check if there is an 'atom_ids' keyword in the self.mol dict.
- # Otherwise pass to the dump2file atom_ids='elements'.
- # This is mostly for XYZ files and not deciphered trajectories.
- if 'atom_ids' not in system_dict.keys():
- atom_ids = 'elements'
- else:
- atom_ids = 'atom_ids'
- # Dump system into a file.
- self._Output.dump2file(
- system_dict, filepath, atom_ids=atom_ids, **kwargs)
-
-[docs] def dump_system_json(self, filepath=None, modular=False, **kwargs):
- """
- Dump a :class:`MolecularSystem` to a JSON dictionary.
-
- The dumped JSON dictionary, with :class:`MolecularSystem`, can then be
- loaded through a JSON loader and then through :func:`load_system()`
- to retrieve a :class:`MolecularSystem`.
-
- Kwargs are passed to :func:`pywindow.io_tools.Output.dump2json()`.
-
- Parameters
- ----------
- filepath : :class:`str`
- The filepath for the dumped file. If :class:`None`, the file is
- dumped localy with :attr:`system_id` as filename.
- (defualt=None)
-
- modular : :class:`bool`
- If False, dump the :class:`MolecularSystem` as in
- :attr:`MolecularSystem.system`, if True, dump the
- :class:`MolecularSystem` as catenated :class:Molecule objects
- from :attr:`MolecularSystem.molecules`
-
- Returns
- -------
- None : :class:`NoneType`
-
- """
- # We pass a copy of the properties dictionary.
- dict_obj = deepcopy(self.system)
- # In case we want a modular system.
- if modular is True:
- try:
- if self.molecules:
- pass
- except AttributeError:
- raise _NotAModularSystem(
- "This system is not modular. Please, run first the "
- "make_modular() function of this class.")
- dict_obj = {}
- for molecule in self.molecules:
- mol_ = self.molecules[molecule]
- dict_obj[molecule] = mol_.mol
- # If no filepath is provided we create one.
- if filepath is None:
- filepath = '/'.join((os.getcwd(), str(self.system_id)))
- # Dump the dictionary to json file.
- self._Output.dump2json(dict_obj, filepath, default=to_list, **kwargs)
-
-"""
-Module intended for the analysis of molecular dynamics trajectories.
-
-The trajectory file (DL_POLY_C:HISTORY, PDB or XYZ) should be loaded with
-the one of the corresponding classes (DLPOLY, PDB or XYZ, respectively).
-
-Example
--------
-In this example a DL_POLY_C HISTORY trajectory file is loaded.
-
-.. code-block:: python
-
- pywindow.trajectory.DLPOLY('path/to/HISTORY')
-
-Then, each of the trajectory frames can be extracted and returned as a
-:class:`pywindow.molecular.MolecularSystem` object for analysis. See
-:mod:`pywindow.molecular` docstring for more information.
-
-Alternatively, the analysis can be performed on a whole or a chunk of
-the trajectory with the :func:`analysis()` function. The benefit is
-that the analysis can be performed in parallel and the results stored as a
-single JSON dictionary in a straightforward way. Also, the deciphering of the
-force field atom ids and the rebuilding of molecules can be applied to each
-frame in a consitent and automated manner. The downfall is that at the
-moment it is not possible to choose the set of parameters that are being
-calculated in the :class:`pywindow.molecular.Molecule` as the
-:func:`pywindow.molecular.Molecule.full_analysis()` is invoked by default.
-However, the computational cost of calculating majority of the structural
-properties is miniscule and it is usually the
-:func:`pywindow.molecular.MolecularSystem.rebuild_system()` step that is the
-bottleneck.
-
-"""
-import os
-import numpy as np
-from copy import deepcopy
-from mmap import mmap, ACCESS_READ
-from contextlib import closing
-from multiprocessing import Pool
-
-from .io_tools import Input, Output
-from .utilities import (
- is_number, create_supercell, lattice_array_to_unit_cell, to_list
-)
-from .molecular import MolecularSystem
-
-
-class _ParallelAnalysisError(Exception):
- def __init__(self, message):
- self.message = message
-
-
-class _TrajectoryError(Exception):
- def __init__(self, message):
- self.message = message
-
-
-class _FormatError(Exception):
- def __init__(self, message):
- self.message = message
-
-
-class _FunctionError(Exception):
- def __init__(self, message):
- self.message = message
-
-
-[docs]def make_supercell(system, matrix, supercell=[1, 1, 1]):
- """
- Return a supercell.
-
- This functions takes the input unitcell and creates a supercell of it that
- is returned as a new :class:`pywindow.molecular.MolecularSystem`.
-
- Parameters
- ----------
- system : :attr:`pywindow.molecular.MolecularSystem.system`
- The unit cell for creation of the supercell
-
- matrix : :class:`numpy.array`
- The unit cell parameters in form of a lattice.
-
- supercell : :class:`list`, optional
- A list that specifies the size of the supercell in the a, b and c
- direction. (default=[1, 1, 1])
-
- Returns
- -------
- :class:`pywindow.molecular.MolecularSystem`
- Returns the created supercell as a new :class:`MolecularSystem`.
-
- """
- user_supercell = [[1, supercell[0]], [1, supercell[1]], [1, supercell[1]]]
- system = create_supercell(system, matrix, supercell=user_supercell)
- return MolecularSystem.load_system(system)
-
-
-[docs]class DLPOLY(object):
- """
- A container for a DL_POLY_C type trajectory (HISTORY).
-
- This function takes a DL_POLY_C trajectory file and maps it for the
- binary points in the file where each frame starts/ends. This way the
- process is fast, as it not require loading the trajectory into computer
- memory. When a frame is being extracted, it is only this frame that gets
- loaded to the memory.
-
- Frames can be accessed individually and loaded as an unmodified string,
- returned as a :class:`pywindow.molecular.MolecularSystem` (and analysed),
- dumped as PDB or XYZ or JSON (if dumped as a
- :attr:`pywindow.molecular.MolecularSystem.system`)
-
- Attributes
- ----------
- filepath : :class:`str`
- The filepath.
-
- system_id : :class:`str`
- The system id inherited from the filename.
-
- frames : :class:`dict`
- A dictionary that is populated, on the fly, with the extracted frames.
-
- analysis_output : :class:`dict`
- A dictionary that is populated, on the fly, with the analysis output.
-
- """
- def __init__(self, filepath):
- # Image conventions - periodic boundary key.
- self._imcon = {
- 0: 'nonperiodic',
- 1: 'cubic',
- 2: 'orthorhombic',
- 3: 'parallelepiped',
- 4: 'truncated octahedral',
- 5: 'rhombic dodecahedral',
- 6: 'x-y parallelogram',
- 7: 'hexagonal prism',
- }
- # Trajectory key - content type.
- self._keytrj = {
- 0: 'coordinates',
- 1: 'coordinates and velocities',
- 2: 'coordinates, velocities and forces',
- }
- self.filepath = filepath
- self.system_id = os.path.basename(filepath)
- self.frames = {}
- self.analysis_output = {}
- # Check the history file at init, if no errors, proceed to mapping.
- self._check_HISTORY()
- # Map the trajectory file at init.
- self._map_HISTORY()
-
- def _map_HISTORY(self):
- """ """
- self.trajectory_map = {}
- with open(self.filepath, 'r') as trajectory_file:
- with closing(
- mmap(
- trajectory_file.fileno(), 0,
- access=ACCESS_READ)) as mapped_file:
- progress = 0
- line = 0
- frame = 0
- cell_param_line = 0
- # We need to first process trajectory file's header.
- header_flag = True
- while progress <= len(mapped_file):
- line = line + 1
- # We read a binary data from a mapped file.
- bline = mapped_file.readline()
- # If the bline length equals zero we terminate.
- # We reached end of the file but still add the last frame!
- if len(bline) == 0:
- self.trajectory_map[frame] = [frame_start, progress]
- frame = frame + 1
- break
- # We need to decode byte line into an utf-8 string.
- sline = bline.decode("utf-8").strip('\n').split()
- # We extract map's byte coordinates for each frame
- if header_flag is False:
- if sline[0] == 'timestep':
- self.trajectory_map[frame] = [
- frame_start, progress
- ]
- frame_start = progress
- frame = frame + 1
- # Here we extract the map's byte coordinates for the header
- # And also the periodic system type needed for later.
- if header_flag is True:
- if sline[0] == 'timestep':
- self.trajectory_map['header'] = self._decode_head(
- [0, progress])
- frame_start = progress
- header_flag = False
- progress = progress + len(bline)
- self.no_of_frames = frame
-
- def _decode_head(self, header_coordinates):
- start, end = header_coordinates
- with open(self.filepath, 'r') as trajectory_file:
- with closing(
- mmap(
- trajectory_file.fileno(), 0,
- access=ACCESS_READ)) as mapped_file:
- header = [
- i.split()
- for i in mapped_file[start:end].decode("utf-8").split('\n')
- ]
- header = [int(i) for i in header[1]]
- self.periodic_boundary = self._imcon[header[1]]
- self.content_type = self._keytrj[header[0]]
- self.no_of_atoms = header[2]
- return header
-
-[docs] def get_frames(self, frames='all', override=False, **kwargs):
- """
- Extract frames from the trajectory file.
-
- Depending on the passed parameters a frame, a list of particular
- frames, a range of frames (from, to), or all frames can be extracted
- with this function.
-
- Parameters
- ----------
- frames : :class:`int` or :class:`list` or :class:`touple` or :class:`str`
- Specified frame (:class:`int`), or frames (:class:`list`), or
- range (:class:`touple`), or `all`/`everything` (:class:`str`).
- (default=`all`)
-
- override : :class:`bool`
- If True, a frame already storred in :attr:`frames` can be override.
- (default=False)
-
- extract_data : :class:`bool`, optional
- If False, a frame is returned as a :class:`str` block as in the
- trajectory file. Ohterwise, it is extracted and returned as
- :class:`pywindow.molecular.MolecularSystem`. (default=True)
-
- swap_atoms : :class:`dict`, optional
- If this kwarg is passed with an appropriate dictionary a
- :func:`pywindow.molecular.MolecularSystem.swap_atom_keys()` will
- be applied to the extracted frame.
-
- forcefield : :class:`str`, optional
- If this kwarg is passed with appropriate forcefield keyword a
- :func:`pywindow.molecular.MolecularSystem.decipher_atom_keys()`
- will be applied to the extracted frame.
-
- Returns
- -------
- :class:`pywindow.molecular.MolecularSystem`
- If a single frame is extracted.
-
- None : :class:`NoneType`
- If more than one frame is extracted, the frames are returned to
- :attr:`frames`
-
- """
- if override is True:
- self.frames = {}
- if isinstance(frames, int):
- frame = self._get_frame(
- self.trajectory_map[frames], frames, **kwargs)
- if frames not in self.frames.keys():
- self.frames[frames] = frame
- return frame
- if isinstance(frames, list):
- for frame in frames:
- if frame not in self.frames.keys():
- self.frames[frame] = self._get_frame(
- self.trajectory_map[frame], frame, **kwargs)
- if isinstance(frames, tuple):
- for frame in range(frames[0], frames[1]):
- if frame not in self.frames.keys():
- self.frames[frame] = self._get_frame(
- self.trajectory_map[frame], frame, **kwargs)
- if isinstance(frames, str):
- if frames in ['all', 'everything']:
- for frame in range(0, self.no_of_frames):
- if frame not in self.frames.keys():
- self.frames[frame] = self._get_frame(
- self.trajectory_map[frame], frame, **kwargs)
-
- def _get_frame(self, frame_coordinates, frame_no, **kwargs):
- kwargs_ = {
- "extract_data": True
- }
- kwargs_.update(kwargs)
- start, end = frame_coordinates
- with open(self.filepath, 'r') as trajectory_file:
- with closing(
- mmap(
- trajectory_file.fileno(), 0,
- access=ACCESS_READ)) as mapped_file:
- if kwargs_["extract_data"] is False:
- return mapped_file[start:end].decode("utf-8")
- else:
- # [:-1] because the split results in last list empty.
- frame = [
- i.split()
- for i in mapped_file[start:end].decode("utf-8").split(
- '\n')
- ][:-1]
- decoded_frame = self._decode_frame(frame)
- molsys = MolecularSystem.load_system(
- decoded_frame,
- "_".join([self.system_id, str(frame_no)]))
- if 'swap_atoms' in kwargs:
- molsys.swap_atom_keys(kwargs['swap_atoms'])
- if 'forcefield' in kwargs:
- molsys.decipher_atom_keys(kwargs['forcefield'])
- return molsys
-
- def _decode_frame(self, frame):
- frame_data = {
- 'frame_info': {
- 'nstep': int(frame[0][1]),
- 'natms': int(frame[0][2]),
- 'keytrj': int(frame[0][3]),
- 'imcon': int(frame[0][4]),
- 'tstep': float(frame[0][5])
- }
- }
- start_line = 1
- if frame_data['frame_info']['imcon'] in [1, 2, 3]:
- frame_data['lattice'] = np.array(frame[1:4], dtype=float).T
- frame_data['unit_cell'] = lattice_array_to_unit_cell(frame_data[
- 'lattice'])
- start_line = 4
- # Depending on what the trajectory key is (see __init__) we need
- # to extract every second/ third/ fourth line for elements and coor.
- elements = []
- coordinates = []
- velocities = []
- forces = []
- for i in range(len(frame[start_line:])):
- i_ = i + start_line
- if frame_data['frame_info']['keytrj'] == 0:
- if i % 2 == 0:
- elements.append(frame[i_][0])
- if i % 2 == 1:
- coordinates.append(frame[i_])
- if frame_data['frame_info']['keytrj'] == 1:
- if i % 3 == 0:
- elements.append(frame[i_][0])
- if i % 3 == 1:
- coordinates.append(frame[i_])
- if i % 3 == 2:
- velocities.append(frame[i_])
- if frame_data['frame_info']['keytrj'] == 2:
- if i % 4 == 0:
- elements.append(frame[i_][0])
- if i % 4 == 1:
- coordinates.append(frame[i_])
- if i % 4 == 2:
- velocities.append(frame[i_])
- if i % 4 == 3:
- forces.append(frame[i_])
- frame_data['atom_ids'] = np.array(elements)
- frame_data['coordinates'] = np.array(coordinates, dtype=float)
- if velocities:
- frame_data['velocities'] = np.array(velocities, dtype=float)
- if forces:
- frame_data['forces'] = np.array(forces, dtype=float)
- return frame_data
-
-[docs] def analysis(
- self, frames='all', ncpus=1, _ncpus=1, override=False, **kwargs
- ):
- """
- Perform structural analysis on a frame/ set of frames.
-
- Depending on the passed parameters a frame, a list of particular
- frames, a range of frames (from, to), or all frames can be analysed
- with this function.
-
- The analysis is performed on each frame and each discrete molecule in
- that frame separately. The steps are as follows:
-
- 1. A frame is extracted and returned as a :class:`MolecularSystem`.
- 2. If `swap_atoms` is set the atom ids are swapped.
- 3. If `forcefield` is set the atom ids are deciphered.
- 4. If `rebuild` is set the molecules in the system are rebuild.
- 5. Each discrete molecule is extracted as :class:`Molecule`
- 6. Each molecule is analysed with :func:`Molecule.full_analysis()`
- 7. Analysis output populates the :attr:`analysis_output` dictionary.
-
- As the analysis of trajectories often have to be unique, many options
- are conditional.
-
- A side effect of this function is that the analysed frames are also
- returned to the :attr:`frames` mimicking the behaviour of the
- :func:`get_frames()`.
-
- Parameters
- ----------
- frames : :class:`int` or :class:`list` or :class:`touple` or :class:`str`
- Specified frame (:class:`int`), or frames (:class:`list`), or
- range (:class:`touple`), or `all`/`everything` (:class:`str`).
- (default='all')
-
- override : :class:`bool`
- If True, an output already storred in :attr:`analysis_output` can
- be override. (default=False)
-
- swap_atoms : :class:`dict`, optional
- If this kwarg is passed with an appropriate dictionary a
- :func:`pywindow.molecular.MolecularSystem.swap_atom_keys()` will
- be applied to the extracted frame.
-
- forcefield : :class:`str`, optional
- If this kwarg is passed with appropriate forcefield keyword a
- :func:`pywindow.molecular.MolecularSystem.decipher_atom_keys()`
- will be applied to the extracted frame.
-
- modular : :class:`bool`, optional
- If this kwarg is passed a
- :func:`pywindow.molecular.MolecularSystem.make_modular()`
- will be applied to the extracted frame. (default=False)
-
- rebuild : :class:`bool`, optional
- If this kwarg is passed a `rebuild=True` is passed to
- :func:`pywindow.molecular.MolecularSystem.make_modular()` that
- will be applied to the extracted frame. (default=False)
-
- ncpus : :class:`int`, optional
- If ncpus > 1, then the analysis is performed in parallel for the
- specified number of parallel jobs. Otherwise, it runs in serial.
- (default=1)
-
- Returns
- -------
- None : :class:`NoneType`
- The function returns `None`, the analysis output is
- returned to :attr:`analysis_output` dictionary.
-
- """
- frames_for_analysis = []
- # First populate the frames_for_analysis list.
- if isinstance(frames, int):
- frames_for_analysis.append(frames)
- if isinstance(frames, list):
- for frame in frames:
- if isinstance(frame, int):
- frames_for_analysis.append(frame)
- else:
- raise _FunctionError(
- "The list should be populated with integers only."
- )
- if isinstance(frames, tuple):
- if isinstance(frames[0], int) and isinstance(frames[1], int):
- for frame in range(frames[0], frames[1]):
- frames_for_analysis.append(frame)
- else:
- raise _FunctionError(
- "The tuple should contain only two integers "
- "for the begining and the end of the frames range."
- )
- if isinstance(frames, str):
- if frames in ['all', 'everything']:
- for frame in range(0, self.no_of_frames):
- frames_for_analysis.append(frame)
- else:
- raise _FunctionError(
- "Didn't recognise the keyword. (see manual)"
- )
- # The override keyword by default is False. So we check if any of the
- # frames were already analysed and if so we delete them from the list.
- # However, if the override is set to True, then we just proceed.
- if override is False:
- frames_for_analysis_new = []
- for frame in frames_for_analysis:
- if frame not in self.analysis_output.keys():
- frames_for_analysis_new.append(frame)
- frames_for_analysis = frames_for_analysis_new
- if ncpus == 1:
- for frame in frames_for_analysis:
- analysed_frame = self._analysis_serial(frame, _ncpus, **kwargs)
- self.analysis_output[frame] = analysed_frame
- if ncpus > 1:
- self._analysis_parallel(frames_for_analysis, ncpus, **kwargs)
-
- def _analysis_serial(self, frame, _ncpus, **kwargs):
- settings = {
- 'rebuild': False,
- 'modular': False,
- }
- settings.update(kwargs)
- molecular_system = self._get_frame(
- self.trajectory_map[frame], frame, extract_data=True, **kwargs
- )
- if settings['modular'] is True:
- molecular_system.make_modular(rebuild=settings['rebuild'])
- molecules = molecular_system.molecules
- else:
- molecules = {'0': molecular_system.system_to_molecule()}
- results = {}
- for molecule in molecules:
- mol = molecules[molecule]
- if 'molsize' in settings:
- molsize = settings['molsize']
- if isinstance(molsize, int):
- if mol.no_of_atoms == molsize:
- results[molecule] = mol.full_analysis(
- _ncpus=_ncpus, **kwargs)
- if isinstance(molsize, tuple) and isinstance(molsize[0], str):
- if molsize[0] in ['bigger', 'greater', 'larger', 'more']:
- if mol.no_of_atoms > molsize[1]:
- results[molecule] = mol.full_analysis(
- _ncpus=_ncpus, **kwargs)
- if molsize[0] in ['smaller', 'less']:
- if mol.no_of_atoms > molsize[1]:
- results[molecule] = mol.full_analysis(
- _ncpus=_ncpus, **kwargs)
- if molsize[0] in ['not', 'isnot', 'notequal', 'different']:
- if mol.no_of_atoms != molsize[1]:
- results[molecule] = mol.full_analysis(
- _ncpus=_ncpus, **kwargs)
- if molsize[0] in ['is', 'equal', 'exactly']:
- if mol.no_of_atoms == molsize[1]:
- results[molecule] = mol.full_analysis(
- _ncpus=_ncpus, **kwargs)
- if molsize[0] in ['between', 'inbetween']:
- if molsize[1] < mol.no_of_atoms < molsize[2]:
- results[molecule] = mol.full_analysis(
- _ncpus=_ncpus, **kwargs)
- else:
- results[molecule] = mol.full_analysis(_ncpus=_ncpus, **kwargs)
- return results
-
- def _analysis_parallel_execute(self, frame, **kwargs):
- settings = {
- 'rebuild': False,
- 'modular': False,
- }
- settings.update(kwargs)
- molecular_system = self._get_frame(
- self.trajectory_map[frame], frame, extract_data=True, **kwargs
- )
- if settings['modular'] is True:
- molecular_system.make_modular(rebuild=settings['rebuild'])
- molecules = molecular_system.molecules
- else:
- molecules = {'0': molecular_system.system_to_molecule()}
- results = {}
- for molecule in molecules:
- mol = molecules[molecule]
- if 'molsize' in settings:
- molsize = settings['molsize']
- if isinstance(molsize, int):
- if mol.no_of_atoms == molsize:
- results[molecule] = mol.full_analysis(**kwargs)
- if isinstance(molsize, tuple) and isinstance(molsize[0], str):
- if molsize[0] in ['bigger', 'greater', 'larger', 'more']:
- if mol.no_of_atoms > molsize[1]:
- results[molecule] = mol.full_analysis(**kwargs)
- if molsize[0] in ['smaller', 'less']:
- if mol.no_of_atoms > molsize[1]:
- results[molecule] = mol.full_analysis(**kwargs)
- if molsize[0] in ['not', 'isnot', 'notequal', 'different']:
- if mol.no_of_atoms != molsize[1]:
- results[molecule] = mol.full_analysis(**kwargs)
- if molsize[0] in ['is', 'equal', 'exactly']:
- if mol.no_of_atoms == molsize[1]:
- results[molecule] = mol.full_analysis(**kwargs)
- if molsize[0] in ['between', 'inbetween']:
- if molsize[1] < mol.no_of_atoms < molsize[2]:
- results[molecule] = mol.full_analysis(**kwargs)
- else:
- results[molecule] = mol.full_analysis(**kwargs)
- return frame, results
-
- def _analysis_parallel(self, frames, ncpus, **kwargs):
- try:
- pool = Pool(processes=ncpus)
- parallel = [
- pool.apply_async(
- self._analysis_parallel_execute,
- args=(frame, ),
- kwds=kwargs) for frame in frames
- ]
- results = [p.get() for p in parallel if p.get()]
- pool.terminate()
- for i in results:
- self.analysis_output[i[0]] = i[1]
- except TypeError:
- pool.terminate()
- raise _ParallelAnalysisError("Parallel analysis failed.")
-
- def _check_HISTORY(self):
- """
- """
- self.check_log = ""
- line = 0
- binary_step = 0
- timestep = 0
- timestep_flag = 'timestep'
- progress = 0
-
- warning_1 = "No comment line is present as the file header.\n"
- warning_2 = " ".join(
- (
- "Second header line is missing from the file",
- "that contains information on the system's periodicity",
- "and the type of the trajectory file.\n"
- )
- )
- warning_3 = " ".join(
- (
- "Comment line encountered in the middle of",
- "the trajectory file.\n"
- )
- )
-
- error_1 = "The trajectory is discontinous.\n"
- error_2 = "The file contains an empty line.\n"
-
- with open(self.filepath, 'r') as trajectory_file:
- # We open the HISTORY trajectory file
- with closing(
- mmap(
- trajectory_file.fileno(), 0,
- access=ACCESS_READ)) as file_binary_map:
- # We use this binary mapping feature that instead of loading
- # the full file into memory beforehand it only
- # maps the content. Especially useful with enormous files
- while binary_step < len(file_binary_map):
- line += 1
- binary_line = file_binary_map.readline()
- binary_step = binary_step + len(binary_line)
- progress_old = progress
- progress = round(binary_step * 100 / len(file_binary_map),
- 0)
- string_line = binary_line.decode("utf-8").strip(
- '\n').split()
-
- # Warning 1
- if line == 1:
- if string_line[0] != 'DLFIELD':
- self.check_log = " ".join(
- (self.check_log, "Line {0}:".format(line),
- warning_1)
- )
-
- # Warning 2
- if line == 2:
- if len(string_line) != 3:
- self.check_log = " ".join(
- (self.check_log, "Line {0}:".format(line),
- warning_2)
- )
-
- # Error 1
- if string_line:
- if string_line[0] == timestep_flag:
- old_timestep = timestep
- timestep = int(string_line[1])
- if old_timestep > timestep:
- error = " ".join(
- "Line {0}:".format(line), error_1
- )
- raise _TrajectoryError(error)
-
- # Error 2
- if len(string_line) == 0:
- error = " ".join(
- "Line {0}:".format(line), error_2
- )
- raise _TrajectoryError(error)
-
-[docs] def save_analysis(self, filepath=None, **kwargs):
- """
- Dump the content of :attr:`analysis_output` as JSON dictionary.
-
- Parameters
- ----------
- filepath : :class:`str`
- The filepath for the JSON file.
-
- Returns
- -------
- None : :class:`NoneType`
- """
- # We pass a copy of the analysis attribute dictionary.
- dict_obj = deepcopy(self.analysis_output)
- # If no filepath is provided we create one.
- if filepath is None:
- filepath = "_".join(
- (str(self.system_id), "pywindow_analysis")
- )
- filepath = '/'.join((os.getcwd(), filepath))
- # Dump the dictionary to json file.
- Output().dump2json(dict_obj, filepath, default=to_list, **kwargs)
- return
-
-[docs] def save_frames(self, frames, filepath=None, filetype='pdb', **kwargs):
- settings = {
- "pdb": Output()._save_pdb,
- "xyz": Output()._save_xyz,
- "decipher": True,
- "forcefield": None,
- }
- settings.update(kwargs)
- if filetype.lower() not in settings.keys():
- raise _FormatError("The '{0}' file format is not supported".format(
- filetype))
- frames_to_get = []
- if isinstance(frames, int):
- frames_to_get.append(frames)
- if isinstance(frames, list):
- frames_to_get = frames
- if isinstance(frames, tuple):
- for frame in range(frames[0], frames[1]):
- frames_to_get.append(frame)
- if isinstance(frames, str):
- if frames in ['all', 'everything']:
- for frame in range(0, self.no_of_frames):
- frames_to_get.append(frame)
- for frame in frames_to_get:
- if frame not in self.frames.keys():
- _ = self.get_frames(frame)
- # If no filepath is provided we create one.
- if filepath is None:
- filepath = '/'.join((os.getcwd(), str(self.system_id)))
- for frame in frames_to_get:
- frame_molsys = self.frames[frame]
- if settings[
- 'decipher'] is True and settings['forcefield'] is not None:
- if "swap_atoms" in settings.keys():
- if isinstance(settings["swap_atoms"], dict):
- frame_molsys.swap_atom_keys(settings["swap_atoms"])
- else:
- raise _FunctionError(
- "The swap_atom_keys function only accepts "
- "'swap_atoms' argument in form of a dictionary.")
- frame_molsys.decipher_atom_keys(settings["forcefield"])
- ffilepath = '_'.join((filepath, str(frame)))
- if 'elements' not in frame_molsys.system.keys():
- raise _FunctionError(
- "The frame (MolecularSystem object) needs to have "
- "'elements' attribute within the system dictionary. "
- "It is, therefore, neccessary that you set a decipher "
- "keyword to True. (see manual)")
- settings[filetype.lower()](frame_molsys.system, ffilepath, **
- kwargs)
-
-
-[docs]class XYZ(object):
- """
- A container for an XYZ type trajectory.
-
- This function takes an XYZ trajectory file and maps it for the
- binary points in the file where each frame starts/ends. This way the
- process is fast, as it not require loading the trajectory into computer
- memory. When a frame is being extracted, it is only this frame that gets
- loaded to the memory.
-
- Frames can be accessed individually and loaded as an unmodified string,
- returned as a :class:`pywindow.molecular.MolecularSystem` (and analysed),
- dumped as PDB or XYZ or JSON (if dumped as a
- :attr:`pywindow.molecular.MolecularSystem.system`)
-
- Attributes
- ----------
- filepath : :class:`str`
- The filepath.
-
- filename : :class:`str`
- The filename.
-
- system_id : :class:`str`
- The system id inherited from the filename.
-
- frames : :class:`dict`
- A dictionary that is populated, on the fly, with the extracted frames.
-
- analysis_output : :class:`dict`
- A dictionary that is populated, on the fly, with the analysis output.
-
- """
- def __init__(self, filepath):
- self.filepath = filepath
- self.filename = os.path.basename(filepath)
- self.system_id = self.filename.split(".")[0]
- self.frames = {}
- self.analysis_output = {}
- # Map the trajectory file at init.
- self._map_trajectory()
-
- def _map_trajectory(self):
- """ Return filepath as a class attribute"""
- self.trajectory_map = {}
- with open(self.filepath, 'r') as trajectory_file:
- with closing(
- mmap(
- trajectory_file.fileno(), 0,
- access=ACCESS_READ)) as mapped_file:
- progress = 0
- line = 0
- frame = -1
- frame_start = 0
- while progress <= len(mapped_file):
- line = line + 1
- # We read a binary data from a mapped file.
- bline = mapped_file.readline()
- # If the bline length equals zero we terminate.
- # We reached end of the file but still add the last frame!
- if len(bline) == 0:
- frame = frame + 1
- self.trajectory_map[frame] = [frame_start, progress]
- break
- # We need to decode byte line into an utf-8 string.
- sline = bline.decode("utf-8").strip('\n').split()
- # We extract map's byte coordinates for each frame
- if (len(sline) == 1 and is_number(sline[0]) and
- progress > 0):
- frame = frame + 1
- self.trajectory_map[frame] = [frame_start, progress]
- frame_start = progress
- # Here we extract the map's byte coordinates for the header
- # And also the periodic system type needed for later.
- progress = progress + len(bline)
- self.no_of_frames = frame + 1
-
-[docs] def get_frames(self, frames='all', override=False, **kwargs):
- """
- Extract frames from the trajectory file.
-
- Depending on the passed parameters a frame, a list of particular
- frames, a range of frames (from, to), or all frames can be extracted
- with this function.
-
- Parameters
- ----------
- frames : :class:`int` or :class:`list` or :class:`touple` or :class:`str`
- Specified frame (:class:`int`), or frames (:class:`list`), or
- range (:class:`touple`), or `all`/`everything` (:class:`str`).
- (default=`all`)
-
- override : :class:`bool`
- If True, a frame already storred in :attr:`frames` can be override.
- (default=False)
-
- extract_data : :class:`bool`, optional
- If False, a frame is returned as a :class:`str` block as in the
- trajectory file. Ohterwise, it is extracted and returned as
- :class:`pywindow.molecular.MolecularSystem`. (default=True)
-
- swap_atoms : :class:`dict`, optional
- If this kwarg is passed with an appropriate dictionary a
- :func:`pywindow.molecular.MolecularSystem.swap_atom_keys()` will
- be applied to the extracted frame.
-
- forcefield : :class:`str`, optional
- If this kwarg is passed with appropriate forcefield keyword a
- :func:`pywindow.molecular.MolecularSystem.decipher_atom_keys()`
- will be applied to the extracted frame.
-
- Returns
- -------
- :class:`pywindow.molecular.MolecularSystem`
- If a single frame is extracted.
-
- None : :class:`NoneType`
- If more than one frame is extracted, the frames are returned to
- :attr:`frames`
-
- """
- if override is True:
- self.frames = {}
- if isinstance(frames, int):
- frame = self._get_frame(
- self.trajectory_map[frames], frames, **kwargs)
- if frames not in self.frames.keys():
- self.frames[frames] = frame
- return frame
- if isinstance(frames, list):
- for frame in frames:
- if frame not in self.frames.keys():
- self.frames[frame] = self._get_frame(
- self.trajectory_map[frame], frame, **kwargs)
- if isinstance(frames, tuple):
- for frame in range(frames[0], frames[1]):
- if frame not in self.frames.keys():
- self.frames[frame] = self._get_frame(
- self.trajectory_map[frame], frame, **kwargs)
- if isinstance(frames, str):
- if frames in ['all', 'everything']:
- for frame in range(0, self.no_of_frames):
- if frame not in self.frames.keys():
- self.frames[frame] = self._get_frame(
- self.trajectory_map[frame], frame, **kwargs)
-
- def _get_frame(self, frame_coordinates, frame_no, **kwargs):
- kwargs_ = {
- "extract_data": True,
- }
- kwargs_.update(kwargs)
- start, end = frame_coordinates
- with open(self.filepath, 'r') as trajectory_file:
- with closing(
- mmap(
- trajectory_file.fileno(), 0,
- access=ACCESS_READ)) as mapped_file:
- if kwargs_["extract_data"] is False:
- return mapped_file[start:end].decode("utf-8")
- else:
- # [:-1] because the split results in last list empty.
- frame = [
- i.split()
- for i in mapped_file[start:end].decode("utf-8").split(
- '\n')
- ][:-1]
- decoded_frame = self._decode_frame(frame)
- molsys = MolecularSystem.load_system(
- decoded_frame,
- "_".join([self.system_id, str(frame_no)]))
- if 'swap_atoms' in kwargs:
- molsys.swap_atom_keys(kwargs['swap_atoms'])
- if 'forcefield' in kwargs:
- molsys.decipher_atom_keys(kwargs['forcefield'])
- return molsys
-
- def _decode_frame(self, frame):
- frame_data = {
- 'frame_info': {
- 'natms': int(frame[0][0]),
- 'remarks': " ".join([*frame[1]]),
- }
- }
- start_line = 2
- elements = []
- coordinates = []
- for i in range(start_line, len(frame)):
- elements.append(frame[i][0])
- coordinates.append(frame[i][1:])
- frame_data['atom_ids'] = np.array(elements)
- frame_data['coordinates'] = np.array(coordinates, dtype=float)
- return frame_data
-
-[docs] def analysis(self, frames='all', ncpus=1, override=False, **kwargs):
- """
- Perform structural analysis on a frame/ set of frames.
-
- Depending on the passed parameters a frame, a list of particular
- frames, a range of frames (from, to), or all frames can be analysed
- with this function.
-
- The analysis is performed on each frame and each discrete molecule in
- that frame separately. The steps are as follows:
-
- 1. A frame is extracted and returned as a :class:`MolecularSystem`.
- 2. If `swap_atoms` is set the atom ids are swapped.
- 3. If `forcefield` is set the atom ids are deciphered.
- 4. If `rebuild` is set the molecules in the system are rebuild.
- 5. Each discrete molecule is extracted as :class:`Molecule`
- 6. Each molecule is analysed with :func:`Molecule.full_analysis()`
- 7. Analysis output populates the :attr:`analysis_output` dictionary.
-
- As the analysis of trajectories often have to be unique, many options
- are conditional.
-
- A side effect of this function is that the analysed frames are also
- returned to the :attr:`frames` mimicking the behaviour of the
- :func:`get_frames()`.
-
- Parameters
- ----------
- frames : :class:`int` or :class:`list` or :class:`touple` or :class:`str`
- Specified frame (:class:`int`), or frames (:class:`list`), or
- range (:class:`touple`), or `all`/`everything` (:class:`str`).
- (default='all')
-
- override : :class:`bool`
- If True, an output already storred in :attr:`analysis_output` can
- be override. (default=False)
-
- swap_atoms : :class:`dict`, optional
- If this kwarg is passed with an appropriate dictionary a
- :func:`pywindow.molecular.MolecularSystem.swap_atom_keys()` will
- be applied to the extracted frame.
-
- forcefield : :class:`str`, optional
- If this kwarg is passed with appropriate forcefield keyword a
- :func:`pywindow.molecular.MolecularSystem.decipher_atom_keys()`
- will be applied to the extracted frame.
-
- modular : :class:`bool`, optional
- If this kwarg is passed a
- :func:`pywindow.molecular.MolecularSystem.make_modular()`
- will be applied to the extracted frame. (default=False)
-
- rebuild : :class:`bool`, optional
- If this kwarg is passed a `rebuild=True` is passed to
- :func:`pywindow.molecular.MolecularSystem.make_modular()` that
- will be applied to the extracted frame. (default=False)
-
- ncpus : :class:`int`, optional
- If ncpus > 1, then the analysis is performed in parallel for the
- specified number of parallel jobs. Otherwise, it runs in serial.
- (default=1)
-
- Returns
- -------
- None : :class:`NoneType`
- The function returns `None`, the analysis output is
- returned to :attr:`analysis_output` dictionary.
-
- """
- if override is True:
- self.analysis_output = {}
- if isinstance(frames, int):
- analysed_frame = self._analysis_serial(frames, ncpus, **kwargs)
- if frames not in self.analysis_output.keys():
- self.analysis_output[frames] = analysed_frame
- return analysed_frame
- else:
- frames_for_analysis = []
- if isinstance(frames, list):
- for frame in frames:
- if frame not in self.analysis_output.keys():
- frames_for_analysis.append(frame)
- if isinstance(frames, tuple):
- for frame in range(frames[0], frames[1]):
- if frame not in self.analysis_output.keys():
- frames_for_analysis.append(frame)
- if isinstance(frames, str):
- if frames in ['all', 'everything']:
- for frame in range(0, self.no_of_frames):
- if frame not in self.analysis_output.keys():
- frames_for_analysis.append(frame)
- self._analysis_parallel(frames_for_analysis, ncpus, **kwargs)
-
- def _analysis_serial(self, frame, ncpus, **kwargs):
- settings = {
- 'rebuild': False,
- 'modular': False,
- }
- settings.update(kwargs)
- molecular_system = self._get_frame(
- self.trajectory_map[frame], frame, extract_data=True, **kwargs
- )
- if settings['modular'] is True:
- molecular_system.make_modular(rebuild=settings['rebuild'])
- molecules = molecular_system.molecules
- else:
- molecules = {'0': molecular_system.system_to_molecule()}
- results = {}
- for molecule in molecules:
- mol = molecules[molecule]
- if 'molsize' in settings:
- molsize = settings['molsize']
- if isinstance(molsize, int):
- if mol.no_of_atoms == molsize:
- results[molecule] = mol.full_analysis(
- ncpus=ncpus, **kwargs)
- if isinstance(molsize, tuple) and isinstance(molsize[0], str):
- if molsize[0] in ['bigger', 'greater', 'larger', 'more']:
- if mol.no_of_atoms > molsize[1]:
- results[molecule] = mol.full_analysis(
- ncpus=ncpus, **kwargs)
- if molsize[0] in ['smaller', 'less']:
- if mol.no_of_atoms > molsize[1]:
- results[molecule] = mol.full_analysis(
- ncpus=ncpus, **kwargs)
- if molsize[0] in ['not', 'isnot', 'notequal', 'different']:
- if mol.no_of_atoms != molsize[1]:
- results[molecule] = mol.full_analysis(
- ncpus=ncpus, **kwargs)
- if molsize[0] in ['is', 'equal', 'exactly']:
- if mol.no_of_atoms == molsize[1]:
- results[molecule] = mol.full_analysis(
- ncpus=ncpus, **kwargs)
- if molsize[0] in ['between', 'inbetween']:
- if molsize[1] < mol.no_of_atoms < molsize[2]:
- results[molecule] = mol.full_analysis(
- ncpus=ncpus, **kwargs)
- else:
- results[molecule] = mol.full_analysis(ncpus=ncpus, **kwargs)
- return results
-
- def _analysis_parallel_execute(self, frame, **kwargs):
- settings = {
- 'rebuild': False,
- 'modular': False,
- }
- settings.update(kwargs)
- molecular_system = self._get_frame(
- self.trajectory_map[frame], frame, extract_data=True, **kwargs
- )
- if settings['modular'] is True:
- molecular_system.make_modular(rebuild=settings['rebuild'])
- molecules = molecular_system.molecules
- else:
- molecules = {'0': molecular_system.system_to_molecule()}
- results = {}
- for molecule in molecules:
- mol = molecules[molecule]
- if 'molsize' in settings:
- molsize = settings['molsize']
- if isinstance(molsize, int):
- if mol.no_of_atoms == molsize:
- results[molecule] = mol.full_analysis(**kwargs)
- if isinstance(molsize, tuple) and isinstance(molsize[0], str):
- if molsize[0] in ['bigger', 'greater', 'larger', 'more']:
- if mol.no_of_atoms > molsize[1]:
- results[molecule] = mol.full_analysis(**kwargs)
- if molsize[0] in ['smaller', 'less']:
- if mol.no_of_atoms > molsize[1]:
- results[molecule] = mol.full_analysis(**kwargs)
- if molsize[0] in ['not', 'isnot', 'notequal', 'different']:
- if mol.no_of_atoms != molsize[1]:
- results[molecule] = mol.full_analysis(**kwargs)
- if molsize[0] in ['is', 'equal', 'exactly']:
- if mol.no_of_atoms == molsize[1]:
- results[molecule] = mol.full_analysis(**kwargs)
- if molsize[0] in ['between', 'inbetween']:
- if molsize[1] < mol.no_of_atoms < molsize[2]:
- results[molecule] = mol.full_analysis(**kwargs)
- else:
- results[molecule] = mol.full_analysis(**kwargs)
- return frame, results
-
- def _analysis_parallel(self, frames, ncpus, **kwargs):
- try:
- pool = Pool(processes=ncpus)
- parallel = [
- pool.apply_async(
- self._analysis_parallel_execute,
- args=(frame, ),
- kwds=kwargs) for frame in frames
- ]
- results = [p.get() for p in parallel if p.get()[1] is not None]
- pool.terminate()
- for i in results:
- self.analysis_output[i[0]] = i[1]
- except TypeError:
- pool.terminate()
- raise _ParallelAnalysisError("Parallel analysis failed.")
-
-[docs] def save_analysis(self, filepath=None, **kwargs):
- """
- Dump the content of :attr:`analysis_output` as JSON dictionary.
-
- Parameters
- ----------
- filepath : :class:`str`
- The filepath for the JSON file.
-
- Returns
- -------
- None : :class:`NoneType`
- """
- # We pass a copy of the analysis attribute dictionary.
- dict_obj = deepcopy(self.analysis_output)
- # If no filepath is provided we create one.
- if filepath is None:
- filepath = "_".join(
- (str(self.system_id), "pywindow_analysis")
- )
- filepath = '/'.join((os.getcwd(), filepath))
- # Dump the dictionary to json file.
- Output().dump2json(dict_obj, filepath, default=to_list, **kwargs)
- return
-
-[docs] def save_frames(self, frames, filepath=None, filetype='pdb', **kwargs):
- settings = {
- "pdb": Output()._save_pdb,
- "xyz": Output()._save_xyz,
- "decipher": True,
- "forcefield": None,
- }
- settings.update(kwargs)
- if filetype.lower() not in settings.keys():
- raise _FormatError("The '{0}' file format is not supported".format(
- filetype))
- frames_to_get = []
- if isinstance(frames, int):
- frames_to_get.append(frames)
- if isinstance(frames, list):
- frames_to_get = frames
- if isinstance(frames, tuple):
- for frame in range(frames[0], frames[1]):
- frames_to_get.append(frame)
- if isinstance(frames, str):
- if frames in ['all', 'everything']:
- for frame in range(0, self.no_of_frames):
- frames_to_get.append(frame)
- for frame in frames_to_get:
- if frame not in self.frames.keys():
- _ = self.get_frames(frame)
- # If no filepath is provided we create one.
- if filepath is None:
- filepath = '/'.join((os.getcwd(), str(self.system_id)))
- for frame in frames_to_get:
- frame_molsys = self.frames[frame]
- if settings[
- 'decipher'] is True and settings['forcefield'] is not None:
- if "swap_atoms" in settings.keys():
- if isinstance(settings["swap_atoms"], dict):
- frame_molsys.swap_atom_keys(settings["swap_atoms"])
- else:
- raise _FunctionError(
- "The swap_atom_keys function only accepts "
- "'swap_atoms' argument in form of a dictionary.")
- frame_molsys.decipher_atom_keys(settings["forcefield"])
- ffilepath = '_'.join((filepath, str(frame)))
- if 'elements' not in frame_molsys.system.keys():
- raise _FunctionError(
- "The frame (MolecularSystem object) needs to have "
- "'elements' attribute within the system dictionary. "
- "It is, therefore, neccessary that you set a decipher "
- "keyword to True. (see manual)")
- settings[filetype.lower()](frame_molsys.system, ffilepath, **
- kwargs)
-
-
-[docs]class PDB(object):
- def __init__(self, filepath):
- """
- A container for an PDB type trajectory.
-
- This function takes an PDB trajectory file and maps it for the
- binary points in the file where each frame starts/ends. This way the
- process is fast, as it not require loading the trajectory into computer
- memory. When a frame is being extracted, it is only this frame that gets
- loaded to the memory.
-
- Frames can be accessed individually and loaded as an unmodified string,
- returned as a :class:`pywindow.molecular.MolecularSystem` (and analysed),
- dumped as PDB or XYZ or JSON (if dumped as a
- :attr:`pywindow.molecular.MolecularSystem.system`)
-
- Attributes
- ----------
- filepath : :class:`str`
- The filepath.
-
- filename : :class:`str`
- The filename.
-
- system_id : :class:`str`
- The system id inherited from the filename.
-
- frames : :class:`dict`
- A dictionary that is populated, on the fly, with the extracted frames.
-
- analysis_output : :class:`dict`
- A dictionary that is populated, on the fly, with the analysis output.
-
- """
- self.filepath = filepath
- self.filename = os.path.basename(filepath)
- self.system_id = self.filename.split(".")[0]
- self.frames = {}
- self.analysis_output = {}
- # Map the trajectory file at init.
- self._map_trajectory()
-
- def _map_trajectory(self):
- """ Return filepath as a class attribute"""
- self.trajectory_map = {}
- with open(self.filepath, 'r') as trajectory_file:
- with closing(
- mmap(
- trajectory_file.fileno(), 0,
- access=ACCESS_READ)) as mapped_file:
- progress = 0
- line = 0
- frame = -1
- frame_start = 0
- while progress <= len(mapped_file):
- line = line + 1
- # We read a binary data from a mapped file.
- bline = mapped_file.readline()
- # If the bline length equals zero we terminate.
- # We reached end of the file but still add the last frame!
- if len(bline) == 0:
- frame = frame + 1
- if progress - frame_start > 10:
- self.trajectory_map[frame] = [
- frame_start, progress
- ]
- break
- # We need to decode byte line into an utf-8 string.
- sline = bline.decode("utf-8").strip('\n').split()
- # We extract map's byte coordinates for each frame
- if len(sline) == 1 and sline[0] == 'END':
- frame = frame + 1
- self.trajectory_map[frame] = [frame_start, progress]
- frame_start = progress
- # Here we extract the map's byte coordinates for the header
- # And also the periodic system type needed for later.
- progress = progress + len(bline)
- self.no_of_frames = frame
-
-[docs] def get_frames(self, frames='all', override=False, **kwargs):
- """
- Extract frames from the trajectory file.
-
- Depending on the passed parameters a frame, a list of particular
- frames, a range of frames (from, to), or all frames can be extracted
- with this function.
-
- Parameters
- ----------
- frames : :class:`int` or :class:`list` or :class:`touple` or :class:`str`
- Specified frame (:class:`int`), or frames (:class:`list`), or
- range (:class:`touple`), or `all`/`everything` (:class:`str`).
- (default=`all`)
-
- override : :class:`bool`
- If True, a frame already storred in :attr:`frames` can be override.
- (default=False)
-
- extract_data : :class:`bool`, optional
- If False, a frame is returned as a :class:`str` block as in the
- trajectory file. Ohterwise, it is extracted and returned as
- :class:`pywindow.molecular.MolecularSystem`. (default=True)
-
- swap_atoms : :class:`dict`, optional
- If this kwarg is passed with an appropriate dictionary a
- :func:`pywindow.molecular.MolecularSystem.swap_atom_keys()` will
- be applied to the extracted frame.
-
- forcefield : :class:`str`, optional
- If this kwarg is passed with appropriate forcefield keyword a
- :func:`pywindow.molecular.MolecularSystem.decipher_atom_keys()`
- will be applied to the extracted frame.
-
- Returns
- -------
- :class:`pywindow.molecular.MolecularSystem`
- If a single frame is extracted.
-
- None : :class:`NoneType`
- If more than one frame is extracted, the frames are returned to
- :attr:`frames`
-
- """
- if override is True:
- self.frames = {}
- if isinstance(frames, int):
- frame = self._get_frame(
- self.trajectory_map[frames], frames, **kwargs)
- if frames not in self.frames.keys():
- self.frames[frames] = frame
- return frame
- if isinstance(frames, list):
- for frame in frames:
- if frame not in self.frames.keys():
- self.frames[frame] = self._get_frame(
- self.trajectory_map[frame], frame, **kwargs)
- if isinstance(frames, tuple):
- for frame in range(frames[0], frames[1]):
- if frame not in self.frames.keys():
- self.frames[frame] = self._get_frame(
- self.trajectory_map[frame], frame, **kwargs)
- if isinstance(frames, str):
- if frames in ['all', 'everything']:
- for frame in range(0, self.no_of_frames):
- if frame not in self.frames.keys():
- self.frames[frame] = self._get_frame(
- self.trajectory_map[frame], frame, **kwargs)
-
- def _get_frame(self, frame_coordinates, frame_no, **kwargs):
- kwargs_ = {
- "extract_data": True
- }
- kwargs_.update(kwargs)
- start, end = frame_coordinates
- with open(self.filepath, 'r') as trajectory_file:
- with closing(
- mmap(
- trajectory_file.fileno(), 0,
- access=ACCESS_READ)) as mapped_file:
- if kwargs_["extract_data"] is False:
- return mapped_file[start:end].decode("utf-8")
- else:
- # In case of PDB we do not split lines!
- frame = mapped_file[start:end].decode("utf-8").split('\n')
- decoded_frame = self._decode_frame(frame)
- molsys = MolecularSystem.load_system(
- decoded_frame,
- "_".join([self.system_id, str(frame_no)]))
- if 'swap_atoms' in kwargs:
- molsys.swap_atom_keys(kwargs['swap_atoms'])
- if 'forcefield' in kwargs:
- molsys.decipher_atom_keys(kwargs['forcefield'])
- return molsys
-
- def _decode_frame(self, frame):
- frame_data = {}
- elements = []
- coordinates = []
- for i in range(len(frame)):
- if frame[i][:6] == 'REMARK':
- if 'REMARKS' not in frame_data.keys():
- frame_data['REMARKS'] = []
- frame_data['REMARKS'].append(frame[i][6:])
- if frame[i][:6] == 'CRYST1':
- cryst = np.array(
- [
- frame[i][6:15], frame[i][15:24], frame[i][24:33],
- frame[i][33:40], frame[i][40:47], frame[i][47:54]
- ],
- dtype=float)
- # This is in case of nonperiodic systems, often they have
- # a,b,c unit cell parameters as 0,0,0.
- if sum(cryst[0:3]) != 0:
- frame_data['CRYST1'] = cryst
- if frame[i][:6] in ['HETATM', 'ATOM ']:
- elements.append(frame[i][12:16].strip())
- coordinates.append(
- [frame[i][30:38], frame[i][38:46], frame[i][46:54]])
- frame_data['atom_ids'] = np.array(elements, dtype='<U8')
- frame_data['coordinates'] = np.array(coordinates, dtype=float)
- return frame_data
-
-[docs] def analysis(self, frames='all', ncpus=1, override=False, **kwargs):
- """
- Perform structural analysis on a frame/ set of frames.
-
- Depending on the passed parameters a frame, a list of particular
- frames, a range of frames (from, to), or all frames can be analysed
- with this function.
-
- The analysis is performed on each frame and each discrete molecule in
- that frame separately. The steps are as follows:
-
- 1. A frame is extracted and returned as a :class:`MolecularSystem`.
- 2. If `swap_atoms` is set the atom ids are swapped.
- 3. If `forcefield` is set the atom ids are deciphered.
- 4. If `rebuild` is set the molecules in the system are rebuild.
- 5. Each discrete molecule is extracted as :class:`Molecule`
- 6. Each molecule is analysed with :func:`Molecule.full_analysis()`
- 7. Analysis output populates the :attr:`analysis_output` dictionary.
-
- As the analysis of trajectories often have to be unique, many options
- are conditional.
-
- A side effect of this function is that the analysed frames are also
- returned to the :attr:`frames` mimicking the behaviour of the
- :func:`get_frames()`.
-
- Parameters
- ----------
- frames : :class:`int` or :class:`list` or :class:`touple` or :class:`str`
- Specified frame (:class:`int`), or frames (:class:`list`), or
- range (:class:`touple`), or `all`/`everything` (:class:`str`).
- (default='all')
-
- override : :class:`bool`
- If True, an output already storred in :attr:`analysis_output` can
- be override. (default=False)
-
- swap_atoms : :class:`dict`, optional
- If this kwarg is passed with an appropriate dictionary a
- :func:`pywindow.molecular.MolecularSystem.swap_atom_keys()` will
- be applied to the extracted frame.
-
- forcefield : :class:`str`, optional
- If this kwarg is passed with appropriate forcefield keyword a
- :func:`pywindow.molecular.MolecularSystem.decipher_atom_keys()`
- will be applied to the extracted frame.
-
- modular : :class:`bool`, optional
- If this kwarg is passed a
- :func:`pywindow.molecular.MolecularSystem.make_modular()`
- will be applied to the extracted frame. (default=False)
-
- rebuild : :class:`bool`, optional
- If this kwarg is passed a `rebuild=True` is passed to
- :func:`pywindow.molecular.MolecularSystem.make_modular()` that
- will be applied to the extracted frame. (default=False)
-
- ncpus : :class:`int`, optional
- If ncpus > 1, then the analysis is performed in parallel for the
- specified number of parallel jobs. Otherwise, it runs in serial.
- (default=1)
-
- Returns
- -------
- None : :class:`NoneType`
- The function returns `None`, the analysis output is
- returned to :attr:`analysis_output` dictionary.
-
- """
- if override is True:
- self.analysis_output = {}
- if isinstance(frames, int):
- analysed_frame = self._analysis_serial(frames, ncpus, **kwargs)
- if frames not in self.analysis_output.keys():
- self.analysis_output[frames] = analysed_frame
- return analysed_frame
- else:
- frames_for_analysis = []
- if isinstance(frames, list):
- for frame in frames:
- if frame not in self.analysis_output.keys():
- frames_for_analysis.append(frame)
- if isinstance(frames, tuple):
- for frame in range(frames[0], frames[1]):
- if frame not in self.analysis_output.keys():
- frames_for_analysis.append(frame)
- if isinstance(frames, str):
- if frames in ['all', 'everything']:
- for frame in range(0, self.no_of_frames):
- if frame not in self.analysis_output.keys():
- frames_for_analysis.append(frame)
- self._analysis_parallel(frames_for_analysis, ncpus, **kwargs)
-
- def _analysis_serial(self, frame, ncpus, **kwargs):
- settings = {
- 'rebuild': False,
- 'modular': False,
- }
- settings.update(kwargs)
- molecular_system = self._get_frame(
- self.trajectory_map[frame], frame, extract_data=True, **kwargs
- )
- if settings['modular'] is True:
- molecular_system.make_modular(rebuild=settings['rebuild'])
- molecules = molecular_system.molecules
- else:
- molecules = {'0': molecular_system.system_to_molecule()}
- results = {}
- for molecule in molecules:
- mol = molecules[molecule]
- if 'molsize' in settings:
- molsize = settings['molsize']
- if isinstance(molsize, int):
- if mol.no_of_atoms == molsize:
- results[molecule] = mol.full_analysis(
- ncpus=ncpus, **kwargs)
- if isinstance(molsize, tuple) and isinstance(molsize[0], str):
- if molsize[0] in ['bigger', 'greater', 'larger', 'more']:
- if mol.no_of_atoms > molsize[1]:
- results[molecule] = mol.full_analysis(
- ncpus=ncpus, **kwargs)
- if molsize[0] in ['smaller', 'less']:
- if mol.no_of_atoms > molsize[1]:
- results[molecule] = mol.full_analysis(
- ncpus=ncpus, **kwargs)
- if molsize[0] in ['not', 'isnot', 'notequal', 'different']:
- if mol.no_of_atoms != molsize[1]:
- results[molecule] = mol.full_analysis(
- ncpus=ncpus, **kwargs)
- if molsize[0] in ['is', 'equal', 'exactly']:
- if mol.no_of_atoms == molsize[1]:
- results[molecule] = mol.full_analysis(
- ncpus=ncpus, **kwargs)
- if molsize[0] in ['between', 'inbetween']:
- if molsize[1] < mol.no_of_atoms < molsize[2]:
- results[molecule] = mol.full_analysis(
- ncpus=ncpus, **kwargs)
- else:
- results[molecule] = mol.full_analysis(ncpus=ncpus, **kwargs)
- return results
-
- def _analysis_parallel_execute(self, frame, **kwargs):
- settings = {
- 'rebuild': False,
- 'modular': False,
- }
- settings.update(kwargs)
- molecular_system = self._get_frame(
- self.trajectory_map[frame], frame, extract_data=True, **kwargs
- )
- if settings['modular'] is True:
- molecular_system.make_modular(rebuild=settings['rebuild'])
- molecules = molecular_system.molecules
- else:
- molecules = {'0': molecular_system.system_to_molecule()}
- results = {}
- for molecule in molecules:
- mol = molecules[molecule]
- if 'molsize' in settings:
- molsize = settings['molsize']
- if isinstance(molsize, int):
- if mol.no_of_atoms == molsize:
- results[molecule] = mol.full_analysis(**kwargs)
- if isinstance(molsize, tuple) and isinstance(molsize[0], str):
- if molsize[0] in ['bigger', 'greater', 'larger', 'more']:
- if mol.no_of_atoms > molsize[1]:
- results[molecule] = mol.full_analysis(**kwargs)
- if molsize[0] in ['smaller', 'less']:
- if mol.no_of_atoms > molsize[1]:
- results[molecule] = mol.full_analysis(**kwargs)
- if molsize[0] in ['not', 'isnot', 'notequal', 'different']:
- if mol.no_of_atoms != molsize[1]:
- results[molecule] = mol.full_analysis(**kwargs)
- if molsize[0] in ['is', 'equal', 'exactly']:
- if mol.no_of_atoms == molsize[1]:
- results[molecule] = mol.full_analysis(**kwargs)
- if molsize[0] in ['between', 'inbetween']:
- if molsize[1] < mol.no_of_atoms < molsize[2]:
- results[molecule] = mol.full_analysis(**kwargs)
- else:
- results[molecule] = mol.full_analysis(**kwargs)
- return frame, results
-
- def _analysis_parallel(self, frames, ncpus, **kwargs):
- try:
- pool = Pool(processes=ncpus)
- parallel = [
- pool.apply_async(
- self._analysis_parallel_execute,
- args=(frame, ),
- kwds=kwargs) for frame in frames
- ]
- results = [p.get() for p in parallel if p.get()[1] is not None]
- pool.terminate()
- for i in results:
- self.analysis_output[i[0]] = i[1]
- except TypeError:
- pool.terminate()
- raise _ParallelAnalysisError("Parallel analysis failed.")
-
-[docs] def save_analysis(self, filepath=None, **kwargs):
- """
- Dump the content of :attr:`analysis_output` as JSON dictionary.
-
- Parameters
- ----------
- filepath : :class:`str`
- The filepath for the JSON file.
-
- Returns
- -------
- None : :class:`NoneType`
- """
- # We pass a copy of the analysis attribute dictionary.
- dict_obj = deepcopy(self.analysis_output)
- # If no filepath is provided we create one.
- if filepath is None:
- filepath = "_".join(
- (str(self.system_id), "pywindow_analysis")
- )
- filepath = '/'.join((os.getcwd(), filepath))
- # Dump the dictionary to json file.
- Output().dump2json(dict_obj, filepath, default=to_list, **kwargs)
- return
-
-[docs] def save_frames(self, frames, filepath=None, filetype='pdb', **kwargs):
- settings = {
- "pdb": Output()._save_pdb,
- "xyz": Output()._save_xyz,
- "decipher": True,
- "forcefield": None,
- }
- settings.update(kwargs)
- if filetype.lower() not in settings.keys():
- raise _FormatError("The '{0}' file format is not supported".format(
- filetype))
- frames_to_get = []
- if isinstance(frames, int):
- frames_to_get.append(frames)
- if isinstance(frames, list):
- frames_to_get = frames
- if isinstance(frames, tuple):
- for frame in range(frames[0], frames[1]):
- frames_to_get.append(frame)
- if isinstance(frames, str):
- if frames in ['all', 'everything']:
- for frame in range(0, self.no_of_frames):
- frames_to_get.append(frame)
- for frame in frames_to_get:
- if frame not in self.frames.keys():
- _ = self.get_frames(frame)
- # If no filepath is provided we create one.
- if filepath is None:
- filepath = '/'.join((os.getcwd(), str(self.system_id)))
- for frame in frames_to_get:
- frame_molsys = self.frames[frame]
- if settings[
- 'decipher'] is True and settings['forcefield'] is not None:
- if "swap_atoms" in settings.keys():
- if isinstance(settings["swap_atoms"], dict):
- frame_molsys.swap_atom_keys(settings["swap_atoms"])
- else:
- raise _FunctionError(
- "The swap_atom_keys function only accepts "
- "'swap_atoms' argument in form of a dictionary.")
- frame_molsys.decipher_atom_keys(settings["forcefield"])
- ffilepath = '_'.join((filepath, str(frame)))
- if 'elements' not in frame_molsys.system.keys():
- raise _FunctionError(
- "The frame (MolecularSystem object) needs to have "
- "'elements' attribute within the system dictionary. "
- "It is, therefore, neccessary that you set a decipher "
- "keyword to True. (see manual)")
- settings[filetype.lower()](frame_molsys.system, ffilepath, **
- kwargs)
-
-"""
-Module containing all general purpose functions shared by other modules.
-
-This module is not intended for the direct use by a User. Therefore, I will
-only docstring functions if I see fit to do so.
-
-LOG
----
-11/07/18
- Changed the way vector path is analysed. Now, the initial analysis is
- done with the geometrical formula for line-sphere intersection. Only
- the remaining vestors that do not intersect any van der Waals spheres are
- then analysed in the old way.
-
-27/07/17
- Fixed the cartesian coordinates -> fractional coordinates -> cartesian
- coordinates conversion related functions, creation of lattice array
- from unit cell parameters (triclinic system: so applicable to any)
- and conversion back to unit cell parameters. WORKS! inspiration from:
- http://www.ruppweb.org/Xray/tutorial/Coordinate%20system%20transformation.htm
-
-26/07/17
- Changed the way bonds are determined. Now, rather then fixed value
- a formula and covalent radii are used as explained in the Elemental_Radii
- spreadsheet (see tables module).
-
-TO DO LIST
-----------
-
-- Fix and validate calculating shape descriptors: asphericity, acylindricity
- and the realtive shape anisotropy. (Not working at the moment)
-
-- In the find_windows() function, maybe change the way the EPS value for
- the DBSCAN() is estimates. Need to look how the distances change with the
- increase in size of the sampling sphere. (validate this with the MongoDB)
-"""
-
-import numpy as np
-from copy import deepcopy
-from multiprocessing import Pool
-from scipy.optimize import brute, fmin, minimize
-from sklearn.cluster import DBSCAN
-from sklearn.metrics.pairwise import euclidean_distances
-from sklearn.neighbors import KDTree
-
-from .tables import (
- atomic_mass, atomic_vdw_radius, opls_atom_keys, atomic_covalent_radius
- )
-
-
-class _AtomKeyError(Exception):
- def __init__(self, message):
- self.message = message
-
-
-class _AtomKeyConflict(Exception):
- def __init__(self, message):
- self.message = message
-
-
-class _ForceFieldError(Exception):
- def __init__(self, message):
- self.message = message
-
-
-class _FunctionError(Exception):
- def __init__(self, message):
- self.message = message
-
-
-[docs]def is_number(number):
- """
- Return True if an object is a number - can be converted into a float.
-
- Parameters
- ----------
- number : any
-
- Returns
- -------
- bool
- True if input is a float convertable (a number), False otherwise.
-
- """
- try:
- float(number)
- return True
- except ValueError:
- return False
-
-
-[docs]def unique(input_list):
- """
- Return a list of unique items (similar to set functionality).
-
- Parameters
- ----------
- input_list : list
- A list containg some items that can occur more than once.
-
- Returns
- -------
- list
- A list with only unique occurances of an item.
-
- """
- output = []
- for item in input_list:
- if item not in output:
- output.append(item)
- return output
-
-
-[docs]def to_list(obj):
- """ """
- if isinstance(obj, np.ndarray):
- return obj.tolist()
- raise TypeError('Not serializable')
-
-
-[docs]def distance(a, b):
- """
- Return the distance between two vectors (points) a and b.
-
- Parameters
- ----------
- a : numpy.ndarray
- First vector.
- b : numpy.ndarray
- Second vector.
-
- Returns
- -------
- numpy.float64
- A distance between two vectors (points).
-
- """
- return (np.sum((a - b)**2))**0.5
-
-
-[docs]def molecular_weight(elements):
- """
- Return molecular weight of a molecule.
-
- Parameters
- ----------
- elements : numpy.ndarray
- An array of all elements (type: str) in a molecule.
-
- Returns
- -------
- numpy.float64
- A molecular weight of a molecule.
-
- """
- return (np.array([atomic_mass[i.upper()] for i in elements]).sum())
-
-
-[docs]def center_of_coor(coordinates):
- """
- Return the centre of coordinates.
-
- Parameters
- ----------
- coordinates : numpy.ndarray
- An array containing molecule's coordinates.
-
- Returns
- -------
- numpy.ndarray
- An 1d array with coordinates of the centre of coordinates excluding
- elements' masses.
-
- """
- return (np.sum(coordinates, axis=0) / coordinates.shape[0])
-
-
-[docs]def center_of_mass(elements, coordinates):
- """
- Return the centre of mass (COM).
-
- Parameters
- ----------
- elements : numpy.ndarray
- An array of all elements (type: str) in a molecule.
-
- coordinates : numpy.ndarray
- An array containing molecule's coordinates.
-
- Returns
- -------
- numpy.ndarray
- An 1d array with coordinates of the centre of mass including elements'
- masses.
-
- """
- mass = molecular_weight(elements)
- mass_array = np.array([[atomic_mass[i.upper()]] * 3 for i in elements])
- mass_coordinates = coordinates * mass_array
- return (np.sum(mass_coordinates, axis=0) / np.array([mass, mass, mass]))
-
-
-[docs]def compose_atom_list(*args):
- """
- Return an `atom list` from elements and/or atom ids and coordinates.
-
- An `atom list` is a special object that some pywindowfunctions uses.
- It is a nested list of lists with each individual list containing:
-
- 1. [[element, coordinates (x, y, z)], ...]
-
- 2. [[element, atom key, coordinates (x, y, z)], ...]
-
- They work better for molecular re-building than two separate arrays for
- elements and coordinates do.
-
- Parameters
- ----------
- elements : :class:`numpy.ndarray`
- An array of all elements (type: str) in a molecule.
-
- coordinates : :class:`numpy.ndarray`
- An array containing molecule's coordinates.
-
- atom_ids : :class:`numpy.ndarray`, optional
- An array of all forcfield dependent atom keys (type:str) in a molecule.
-
-
- Returns
- -------
- list
- Version 1 or version 2 atom list depending on input parameters.
-
- Raises
- ------
- _FunctionError : :class:`Exception`
- Raised when wrong number of parameters is passed to the function.
-
- """
- if len(args) == 2:
- atom_list = [[
- i[0],
- round(float(i[1]), 8),
- round(float(i[2]), 8),
- round(float(i[3]), 8),
- ] for i in np.concatenate(
- (args[0].reshape(-1, 1), args[1]), axis=1)]
- elif len(args) == 3:
- atom_list = [
- [
- i[0],
- i[1],
- round(float(i[2]), 8),
- round(float(i[3]), 8),
- round(float(i[4]), 8),
- ]
- for i in np.concatenate(
- (np.concatenate(
- (args[0].reshape(-1, 1), args[1].reshape(-1, 1)
- ), axis=1), args[2]),
- axis=1)
- ]
- else:
- raise _FunctionError(
- "The compose_atom_list() function accepts only 2 or 3 arguments.")
- return atom_list
-
-
-[docs]def decompose_atom_list(atom_list):
- """
- Return elements and/or atom ids and coordinates from an `atom list`.
-
- Depending on input type of an atom list (version 1 or 2)
-
- 1. [[element, coordinates (x, y, z)], ...]
- 2. [[element, atom key, coordinates (x, y, z)], ...]
-
- the function reverses what pywindow.utilities.compose_atom_list() do.
-
- Parameters
- ----------
- atom_list : list
- A nested list of lists (version 1 or 2)
-
- Returns
- -------
- touple
- A touple of elements and coordinates arrays, or if input contained
- atom ideas, also atom ids array.
-
- """
- transpose = list(zip(*atom_list))
- if len(transpose) == 4:
- elements = np.array(transpose[0])
- array_a = np.array(transpose[1]).reshape(-1, 1)
- array_b = np.array(transpose[2]).reshape(-1, 1)
- array_c = np.array(transpose[3]).reshape(-1, 1)
- array_ab = np.concatenate((array_a, array_b), axis=1)
- coordinates = np.concatenate((array_ab, array_c), axis=1)
- return elements, coordinates
- elif len(transpose) == 5:
- elements = np.array(transpose[0])
- atom_ids = np.array(transpose[1])
- array_a = np.array(transpose[2]).reshape(-1, 1)
- array_b = np.array(transpose[3]).reshape(-1, 1)
- array_c = np.array(transpose[4]).reshape(-1, 1)
- array_ab = np.concatenate((array_a, array_b), axis=1)
- coordinates = np.concatenate((array_ab, array_c), axis=1)
- return elements, atom_ids, coordinates
- else:
- raise _FunctionError(
- "The decompose_atom_list() function accepts only list of lists "
- " with only 4 or 5 items per sublist.")
-
-
-[docs]def dlf_notation(atom_key):
- """Return element for atom key using DL_F notation."""
- split = list(atom_key)
- element = ''
- number = False
- count = 0
- while number is False:
- element = "".join((element, split[count]))
- count += 1
- if is_number(split[count]) is True:
- number = True
- # In case of for example Material Studio output, integers can also be
- # in the beginning of the string. As the dlf_notation decipher function
- # is very general in use, we have to make sure these integers are deleted.
- # In standard DL_F notation the string will never start with integer so it
- # will not affect the functionality towards it.
- # EDIT2: also the '?' atoms, you can delete them manually or somewhere else
- element = "".join(i for i in element if not is_number(i))
- element = "".join(i for i in element if i != '?')
- return element
-
-
-[docs]def opls_notation(atom_key):
- """Return element for OPLS forcefield atom key."""
- # warning for Ne, He, Na types overlap
- conflicts = ['ne', 'he', 'na']
- if atom_key in conflicts:
- raise _AtomKeyConflict((
- "One of the OPLS conflicting "
- "atom_keys has occured '{0}'. "
- "For how to solve this issue see the manual or "
- "MolecularSystem._atom_key_swap() doc string.").format(atom_key))
- for element in opls_atom_keys:
- if atom_key in opls_atom_keys[element]:
- return element
- # In case if atom_key was not found in the OPLS keys dictionary
- raise _AtomKeyError((
- "OPLS atom key {0} was not found in OPLS keys dictionary.").format(
- atom_key))
-
-
-[docs]def decipher_atom_key(atom_key, forcefield):
- """
- Return element for deciphered atom key.
-
- This functions checks if the forcfield specified by user is supported
- and passes the atom key to the appropriate function for deciphering.
-
- Parameters
- ----------
- atom_key : str
- The atom key which is to be deciphered.
-
- forcefield : str
- The forcefield to which the atom key belongs to.
-
- Returns
- -------
- str
- A string that is the periodic table element equvalent of forcefield
- atom key.
-
- """
- load_funcs = {
- 'DLF': dlf_notation,
- 'DL_F': dlf_notation,
- 'OPLS': opls_notation,
- 'OPLSAA': opls_notation,
- 'OPLS2005': opls_notation,
- 'OPLS3': opls_notation,
- }
- if forcefield.upper() in load_funcs.keys():
- return load_funcs[forcefield.upper()](atom_key)
- else:
- raise _ForceFieldError(
- ("Unfortunetely, '{0}' forcefield is not supported by pyWINDOW."
- " For list of supported forcefields see User's Manual or "
- "MolecularSystem._decipher_atom_keys() function doc string."
- ).format(forcefield))
-
-
-[docs]def shift_com(elements, coordinates, com_adjust=np.zeros(3)):
- """
- Return coordinates translated by some vector.
-
- Parameters
- ----------
- elements : numpy.ndarray
- An array of all elements (type: str) in a molecule.
-
- coordinates : numpy.ndarray
- An array containing molecule's coordinates.
-
- com_adjust : numpy.ndarray (default = [0, 0, 0])
-
- Returns
- -------
- numpy.ndarray
- Translated array of molecule's coordinates.
-
- """
- com = center_of_mass(elements, coordinates)
- com = np.array([com - com_adjust] * coordinates.shape[0])
- return coordinates - com
-
-
-[docs]def max_dim(elements, coordinates):
- """
- Return the maximum diameter of a molecule.
-
- Parameters
- ----------
- elements : numpy.ndarray
- An array of all elements (type: str) in a molecule.
-
- coordinates : numpy.ndarray
- An array containing molecule's coordinates.
-
- Returns
- -------
-
- """
- atom_vdw_vertical = np.matrix(
- [[atomic_vdw_radius[i.upper()]] for i in elements])
- atom_vdw_horizontal = np.matrix(
- [atomic_vdw_radius[i.upper()] for i in elements])
- dist_matrix = euclidean_distances(coordinates, coordinates)
- vdw_matrix = atom_vdw_vertical + atom_vdw_horizontal
- re_dist_matrix = dist_matrix + vdw_matrix
- final_matrix = np.triu(re_dist_matrix)
- i1, i2 = np.unravel_index(final_matrix.argmax(), final_matrix.shape)
- maxdim = final_matrix[i1, i2]
- return i1, i2, maxdim
-
-
-[docs]def pore_diameter(elements, coordinates, com=None):
- """Return pore diameter of a molecule."""
- if com is None:
- com = center_of_mass(elements, coordinates)
- atom_vdw = np.array([[atomic_vdw_radius[x.upper()]] for x in elements])
- dist_matrix = euclidean_distances(coordinates, com.reshape(1, -1))
- re_dist_matrix = dist_matrix - atom_vdw
- index = np.argmin(re_dist_matrix)
- pored = re_dist_matrix[index][0] * 2
- return (pored, index)
-
-
-[docs]def correct_pore_diameter(com, *params):
- """Return negative of a pore diameter. (optimisation function)."""
- elements, coordinates = params
- return (-pore_diameter(elements, coordinates, com)[0])
-
-
-[docs]def opt_pore_diameter(elements, coordinates, bounds=None, com=None, **kwargs):
- """Return optimised pore diameter and it's COM."""
- args = elements, coordinates
- if com is not None:
- pass
- else:
- com = center_of_mass(elements, coordinates)
- if bounds is None:
- pore_r = pore_diameter(elements, coordinates, com=com)[0] / 2
- bounds = (
- (com[0]-pore_r, com[0]+pore_r),
- (com[1]-pore_r, com[1]+pore_r),
- (com[2]-pore_r, com[2]+pore_r)
- )
- minimisation = minimize(
- correct_pore_diameter, x0=com, args=args, bounds=bounds)
- pored = pore_diameter(elements, coordinates, com=minimisation.x)
- return (pored[0], pored[1], minimisation.x)
-
-
-[docs]def sphere_volume(sphere_radius):
- """Return volume of a sphere."""
- return (4 / 3 * np.pi * sphere_radius**3)
-
-
-
-
-
-
-
-
-[docs]def relative_shape_anisotropy(S):
- return (1 - 3 * (
- (S[0] * S[1] + S[0] * S[2] + S[1] * S[2]) / (np.sum(S))**2))
-
-
-[docs]def get_tensor_eigenvalues(T, sort=False):
- if sort:
- return (sorted(np.linalg.eigvals(T), reverse=True))
- else:
- return (np.linalg.eigvals(T))
-
-
-[docs]def get_gyration_tensor(elements, coordinates):
- """
- Return the gyration tensor of a molecule.
-
- The gyration tensor should be invariant to the molecule's position.
- The known formulas for the gyration tensor have the correction for the
- centre of mass of the molecule, therefore, the coordinates are first
- corrected for the centre of mass and essentially shifted to the origin.
-
- Parameters
- ----------
- elements : numpy.ndarray
- The array containing the molecule's elemental data.
-
- coordinates : numpy.ndarray
- The array containing the Cartesian coordinates of the molecule.
-
- Returns
- -------
- numpy.ndarray
- The gyration tensor of a molecule invariant to the molecule's position.
-
- """
- # First calculate COM for correction.
- com = centre_of_mass(elements, coordinates)
- # Correct the coordinates for the COM.
- coordinates = coordinates - com
- # Calculate diagonal and then other values of the matrix.
- diag = np.sum(coordinates**2, axis=0)
- xy = np.sum(coordinates[:, 0] * coordinates[:, 1])
- xz = np.sum(coordinates[:, 0] * coordinates[:, 2])
- yz = np.sum(coordinates[:, 1] * coordinates[:, 2])
- S = np.array([[diag[0], xy, xz], [xy, diag[1], yz],
- [xz, yz, diag[2]]]) / coordinates.shape[0]
- return (S)
-
-
-[docs]def get_inertia_tensor(elements, coordinates):
- """
- Return the tensor of inertia a molecule.
-
- Parameters
- ----------
- elements : numpy.ndarray
- The array containing the molecule's elemental data.
-
- coordinates : numpy.ndarray
- The array containing the Cartesian coordinates of the molecule.
-
- Returns
- -------
- numpy.ndarray
- The tensor of inertia of a molecule.
-
- """
- pow2 = coordinates**2
- molecular_weight = np.array(
- [[atomic_mass[e.upper()]] for e in elements])
-
- diag_1 = np.sum(molecular_weight * (pow2[:, 1] + pow2[:, 2]))
- diag_2 = np.sum(molecular_weight * (pow2[:, 0] + pow2[:, 2]))
- diag_3 = np.sum(molecular_weight * (pow2[:, 0] + pow2[:, 1]))
-
- mxy = np.sum(-molecular_weight * coordinates[:, 0] * coordinates[:, 1])
- mxz = np.sum(-molecular_weight * coordinates[:, 0] * coordinates[:, 2])
- myz = np.sum(-molecular_weight * coordinates[:, 1] * coordinates[:, 2])
-
- inertia_tensor = np.array([[diag_1, mxy, mxz], [mxy, diag_2, myz],
- [mxz, myz, diag_3]]) / coordinates.shape[0]
- return (inertia_tensor)
-
-
-[docs]def principal_axes(elements, coordinates):
- return (np.linalg.eig(get_inertia_tensor(elements, coordinates))[1].T)
-
-
-[docs]def normalize_vector(vector):
- """
- Normalize a vector.
-
- A new vector is returned, the original vector is not modified.
-
- Parameters
- ----------
- vector : np.array
- The vector to be normalized.
-
- Returns
- -------
- np.array
- The normalized vector.
-
- """
- v = np.divide(vector, np.linalg.norm(vector))
- return np.round(v, decimals=4)
-
-
-[docs]def rotation_matrix_arbitrary_axis(angle, axis):
- """
- Return a rotation matrix of `angle` radians about `axis`.
-
- Parameters
- ----------
- angle : int or float
- The size of the rotation in radians.
-
- axis : numpy.array
- A 3 element aray which represents a vector. The vector is the
- axis about which the rotation is carried out.
-
- Returns
- -------
- numpy.array
- A 3x3 array representing a rotation matrix.
-
- """
- axis = normalize_vector(axis)
-
- a = np.cos(angle / 2)
- b, c, d = axis * np.sin(angle / 2)
-
- e11 = np.square(a) + np.square(b) - np.square(c) - np.square(d)
- e12 = 2 * (b * c - a * d)
- e13 = 2 * (b * d + a * c)
-
- e21 = 2 * (b * c + a * d)
- e22 = np.square(a) + np.square(c) - np.square(b) - np.square(d)
- e23 = 2 * (c * d - a * b)
-
- e31 = 2 * (b * d - a * c)
- e32 = 2 * (c * d + a * b)
- e33 = np.square(a) + np.square(d) - np.square(b) - np.square(c)
-
- return np.array([[e11, e12, e13], [e21, e22, e23], [e31, e32, e33]])
-
-
-[docs]def align_principal_ax(elements, coordinates):
- """ """
- coor = deepcopy(coordinates)
- new_coor = []
- rot = []
- for i, j in zip([2, 1, 0], [[1, 0, 0], [0, 1, 0], [0, 0, 1]]):
- p_axes = principal_axes(elements, coordinates)
-
- r_vec = np.cross(p_axes[i], np.array(j))
- sin = np.linalg.norm(r_vec)
- cos = np.dot(p_axes[i], np.array(j))
- ang = np.arctan2(sin, cos)
-
- R_mat = np.matrix(rotation_matrix_arbitrary_axis(ang, r_vec))
- rot.append(R_mat)
-
- for i in coor:
- new_coord = R_mat * i.reshape(-1, 1)
- new_coor.append(np.array(new_coord.reshape(1, -1))[0])
- new_coor = np.array(new_coor)
- coor = new_coor
- new_coor = []
- return (coor, rot)
-
-
-[docs]def calc_asphericity(elements, coordinates):
- inertia_tensor = get_inertia_tensor(elements, coordinates)
- tensor_eigenvalues = get_tensor_eigenvalues(inertia_tensor, sort=True)
- return asphericity(tensor_eigenvalues)
-
-
-[docs]def calc_acylidricity(elements, coordinates):
- inertia_tensor = get_inertia_tensor(elements, coordinates)
- tensor_eigenvalues = get_tensor_eigenvalues(inertia_tensor, sort=True)
- return acylidricity(tensor_eigenvalues)
-
-
-[docs]def calc_relative_shape_anisotropy(elements, coordinates):
- inertia_tensor = get_inertia_tensor(elements, coordinates)
- tensor_eigenvalues = get_tensor_eigenvalues(inertia_tensor, sort=True)
- return relative_shape_anisotropy(tensor_eigenvalues)
-
-
-[docs]def unit_cell_to_lattice_array(cryst):
- """Return parallelpiped unit cell lattice matrix."""
- a_, b_, c_, alpha, beta, gamma = cryst
- # Convert angles from degrees to radians.
- r_alpha = np.deg2rad(alpha)
- r_beta = np.deg2rad(beta)
- r_gamma = np.deg2rad(gamma)
- # Calculate unit cell volume that is neccessary.
- volume = a_ * b_ * c_ * (
- 1 - np.cos(r_alpha)**2 - np.cos(r_beta)**2 - np.cos(r_gamma)**2 + 2 *
- np.cos(r_alpha) * np.cos(r_beta) * np.cos(r_gamma))**0.5
- # Create the orthogonalisation Matrix (M^-1) - lattice matrix
- a_x = a_
- a_y = b_ * np.cos(r_gamma)
- a_z = c_ * np.cos(r_beta)
- b_x = 0
- b_y = b_ * np.sin(r_gamma)
- b_z = c_ * (
- np.cos(r_alpha) - np.cos(r_beta) * np.cos(r_gamma)) / np.sin(r_gamma)
- c_x = 0
- c_y = 0
- c_z = volume / (a_ * b_ * np.sin(r_gamma))
- lattice_array = np.array(
- [[a_x, a_y, a_z], [b_x, b_y, b_z], [c_x, c_y, c_z]])
- return lattice_array
-
-
-[docs]def lattice_array_to_unit_cell(lattice_array):
- """Return crystallographic param. from unit cell lattice matrix."""
- cell_lengths = np.sqrt(np.sum(lattice_array**2, axis=0))
- gamma_r = np.arccos(lattice_array[0][1] / cell_lengths[1])
- beta_r = np.arccos(lattice_array[0][2] / cell_lengths[2])
- alpha_r = np.arccos(
- lattice_array[1][2] * np.sin(gamma_r) / cell_lengths[2]
- + np.cos(beta_r) * np.cos(gamma_r)
- )
- cell_angles = [
- np.rad2deg(alpha_r), np.rad2deg(beta_r), np.rad2deg(gamma_r)
- ]
- return np.append(cell_lengths, cell_angles)
-
-
-[docs]def volume_from_lattice_array(lattice_array):
- """Return unit cell's volume from lattice matrix."""
- return np.linalg.det(lattice_array)
-
-
-[docs]def volume_from_cell_parameters(cryst):
- """Return unit cell's volume from crystallographic parameters."""
- return volume_from_lattice_array(unit_cell_to_lattice_array(cryst))
-
-
-[docs]def fractional_from_cartesian(coordinate, lattice_array):
- """Return a fractional coordinate from a cartesian one."""
- deorthogonalisation_M = np.matrix(np.linalg.inv(lattice_array))
- fractional = deorthogonalisation_M * coordinate.reshape(-1, 1)
- return np.array(fractional.reshape(1, -1))
-
-
-[docs]def cartisian_from_fractional(coordinate, lattice_array):
- """Return cartesian coordinate from a fractional one."""
- orthogonalisation_M = np.matrix(lattice_array)
- orthogonal = orthogonalisation_M * coordinate.reshape(-1, 1)
- return np.array(orthogonal.reshape(1, -1))
-
-
-[docs]def cart2frac_all(coordinates, lattice_array):
- """Convert all cartesian coordinates to fractional."""
- frac_coordinates = deepcopy(coordinates)
- for coord in range(frac_coordinates.shape[0]):
- frac_coordinates[coord] = fractional_from_cartesian(
- frac_coordinates[coord], lattice_array)
- return frac_coordinates
-
-
-[docs]def frac2cart_all(frac_coordinates, lattice_array):
- """Convert all fractional coordinates to cartesian."""
- coordinates = deepcopy(frac_coordinates)
- for coord in range(coordinates.shape[0]):
- coordinates[coord] = cartisian_from_fractional(coordinates[coord],
- lattice_array)
- return coordinates
-
-
-[docs]def create_supercell(system, supercell=[[-1, 1], [-1, 1], [-1, 1]]):
- """Create a supercell."""
- if 'lattice' not in system.keys():
- matrix = unit_cell_to_lattice_array(system['unit_cell'])
- else:
- matrix = system['lattice']
- coordinates = deepcopy(system['coordinates'])
- multiplication_matrices = []
- for a_ in range(supercell[0][0], supercell[0][1] + 1):
- for b_ in range(supercell[1][0], supercell[1][1] + 1):
- for c_ in range(supercell[2][0], supercell[2][1] + 1):
- mult_matrix = np.array([[a_, b_, c_]])
- mult_matrix = np.repeat(
- mult_matrix, coordinates.shape[0], axis=0)
- multiplication_matrices.append(mult_matrix)
- frac_coordinates = cart2frac_all(coordinates, matrix)
- updated_coordinates = []
- for mat in multiplication_matrices:
- updated_coor = frac_coordinates + mat
- updated_coordinates.append(updated_coor)
- supercell_frac_coordinates = np.concatenate(updated_coordinates, axis=0)
- supercell_coordinates = frac2cart_all(supercell_frac_coordinates, matrix)
- # Now for each new cell in the supercell we need to repeat the
- # elements array so that it maches
- new_elements = deepcopy(system['elements'])
- new_ids = deepcopy(system['atom_ids'])
- for i in range(len(updated_coordinates) - 1):
- new_elements = np.concatenate((new_elements, system['elements']))
- new_ids = np.concatenate((new_ids, system['atom_ids']))
- cryst = lattice_array_to_unit_cell(matrix)
- supercell_system = {
- 'elements': new_elements,
- 'atom_ids': new_ids,
- 'coordinates': supercell_coordinates,
- 'unit_cell': cryst,
- 'lattice': matrix,
- }
- return supercell_system
-
-
-[docs]def is_inside_polyhedron(point, polyhedron):
- if polyhedron.shape == (1, 6):
- matrix = unit_cell_to_lattice_array(polyhedron)
- if polyhedron.shape == (3, 3):
- matrix = polyhedron
-
- frac_coord = pw.utilities.fractional_from_cartesian(point, matrix)[0]
-
- if 0 <= frac_coord[0] <= 1.000 and 0 <= frac_coord[
- 1] <= 1.000 and 0 <= frac_coord[2] <= 1.000:
- return True
- else:
- return False
-
-
-[docs]def normal_vector(origin, vectors):
- """Return normal vector for two vectors with same origin."""
- return np.cross(vectors[0] - origin, vectors[1] - origin)
-
-
-[docs]def discrete_molecules(system, rebuild=None, tol=0.4):
- """
- Decompose molecular system into individual discreet molecules.
-
- Note
- ----
- New formula for bonds: (26/07/17)
- The two atoms, x and y, are considered bonded if the distance between
- them, calculated with distance matrix, is within the ranges:
- .. :math:
-
- Rcov(x) + Rcov(y) - t < R(x,y) < Rcov(x) + Rcov(y) + t
-
- where Rcov is the covalent radius and the tolarenace (t) is set to
- 0.4 Angstrom.
-
- """
- # First we check which operation mode we use.
- # 1) Non-periodic MolecularSystem.
- # 2) Periodic MolecularSystem without rebuilding.
- # 3) Periodic Molecular system with rebuilding (supercell provided).
- if rebuild is not None:
- mode = 3
- else:
- if 'unit_cell' in system.keys():
- if system['unit_cell'].shape == (6,):
- mode = 2
- else:
- mode = 1
- elif 'lattice' in system.keys():
- if system['lattice'].shape == (3, 3):
- mode = 2
- else:
- mode = 1
- else:
- mode = 1
- # We create a list containing all atoms, theirs periodic elements and
- # coordinates. As this process is quite complicated, we need a list
- # which we will gradually be reducing.
- try:
- elements = system['elements']
- coordinates = system['coordinates']
- except KeyError:
- raise _FunctionError(
- "The 'elements' key is missing in the 'system' dictionary "
- "attribute of the MolecularSystem object. Which means, you need to"
- " decipher the forcefield based atom keys first (see manual)."
- )
- coordinates = system['coordinates']
- args = (elements, coordinates)
- adj = 0
- # If there are forcefield 'atom ids' as well we will retain them.
- if 'atom_ids' in system.keys():
- atom_ids = system['atom_ids']
- args = (elements, atom_ids, coordinates)
- adj = 1
- atom_list = compose_atom_list(*args)
- atom_coor = decompose_atom_list(atom_list)[1 + adj]
- # Scenario 1: We load a non-periodic MolecularSystem.
- # We will not have 'unit_cell' nor 'lattice' keywords in the dictionary
- # and also we do not do any re-building.
- # Scenario 2: We load a periodic MolecularSystem. We want to only Extract
- # complete molecules that do not have been affected by the periodic
- # boundary.
- # Scenario 3: We load a periodic Molecular System. We want it to be rebuild
- # therefore, we also provide a supercell.
- # Scenarios 2 and 3 require a lattice and also their origin is at origin.
- # Scenario 1 should have the origin at the center of mass of the system.
- # EDIT 09-04-18: All origins/pseudo_origin had to be skewed towards some
- # direction (x + 0.01) so that there would be no ambiguity in periodic
- # ang highly symmetric systems where the choice of the closest atom would
- # be random from a set of equally far choices - bug found in the testing
- # this way rebuild system should always look the same from the same input
- # and on different machines.
- if mode == 2 or mode == 3:
- # Scenarios 2 or 3.
- origin = np.array([0.01, 0., 0.])
- if 'lattice' not in system.keys():
- matrix = unit_cell_to_lattice_array(system['unit_cell'])
- else:
- matrix = system['lattice']
- pseudo_origin_frac = np.array([0.26, 0.25, 0.25])
- pseudo_origin = cartisian_from_fractional(pseudo_origin_frac, matrix)
- # If a supercell is also provided that encloses the unit cell for the
- # reconstruction of the molecules through the periodic boundary.
- if rebuild is not None:
- selements = rebuild['elements']
- sids = rebuild['atom_ids']
- scoordinates = rebuild['coordinates']
- satom_list = compose_atom_list(selements, sids, scoordinates)
- satom_coor = decompose_atom_list(satom_list)[1 + adj]
- # There is one more step. We need to sort out for all the
- # reconstructed molecules, which are the ones that belong to the
- # unit cell. As we did the reconstruction to every chunk in the unit
- # cell we have now some molecules that belong to neighbouring cells.
- # The screening is simple. If the COM of a molecule translated to
- # fractional coordinates (so that it works for parallelpiped) is
- # within the unit cell boundaries <0, 1> then it's it. There is
- # an exception, for the trajectories, very often the unit cell
- # is centered at origin. Therefore we need to use <-0.5, 0.5>
- # boundary. We will simply decide which is the case by calculating
- # the centre of mass of the whole system.
- system_com = center_of_mass(elements, coordinates)
- if np.allclose(system_com, origin, atol=1e-00):
- boundary = np.array([-0.5, 0.5])
- else:
- boundary = np.array([0., 1.])
- else:
- # Scenario 1.
- pseudo_origin = center_of_mass(
- elements, coordinates) + np.array([0.01, 0., 0.])
- # Here the final discrete molecules will be stored.
- molecules = []
- # Exceptions. Usually end-point atoms that create single bonds or
- # just a separate atoms in the system.
- exceptions = ['H', 'CL', 'BR', 'F', 'HE', 'AR', 'NE', 'KR', 'XE', 'RN']
- # The upper limit for distances analysed for bonds will be assigned for
- # a given system (to save time). We take set('elements') and then find
- # the largest R(cov) in the system and set the max_dist as a double
- # of it plus the 150% tolerance (tol).
- set_of_elements = set(system['elements'])
- max_r_cov = max([
- atomic_covalent_radius[i.upper()] for i in set_of_elements])
- max_dist = 2 * max_r_cov + tol
- # We continue untill all items in the list have been analysed and popped.
- while atom_list:
- inside_atoms_heavy = [
- i for i in atom_list if i[0].upper() not in exceptions
- ]
- if inside_atoms_heavy:
- # Now we create an array of atom coordinates. It does seem
- # somehow counter-intuitive as this is what we started with
- # and made it into a list. But, in my opinion it's the only
- # way to do it. It's hard to control and delete items in two
- # separate arrays that we started with and we don't want
- # atoms already assigned in our array for distance matrix.
- inside_atoms_coord_heavy = decompose_atom_list(inside_atoms_heavy)[
- 1 + adj]
- dist_matrix = euclidean_distances(inside_atoms_coord_heavy,
- pseudo_origin.reshape(1, -1))
- atom_index_x, _ = np.unravel_index(dist_matrix.argmin(),
- dist_matrix.shape)
- # Added this so that lone atoms (even if heavy) close to the
- # periodic boundary are not analysed, as they surely have matching
- # symmetry equivalence that bind to a bigger atom cluster inside
- # the unit_cell.
- potential_starting_point = inside_atoms_heavy[atom_index_x]
- pot_arr = np.array(potential_starting_point[1 + adj:])
- dist_matrix = euclidean_distances(
- atom_coor, pot_arr.reshape(1, -1)
- )
- idx = (dist_matrix > 0.1) * (dist_matrix < max_dist)
- if len(idx) < 1:
- pass
- else:
- working_list = [potential_starting_point]
- else:
- # Safety check.
- break
- final_molecule = []
- while working_list:
- working_list_temp = []
- try:
- atom_coor = decompose_atom_list(atom_list)[1 + adj]
- except _FunctionError:
- atom_coor = None
- for i in working_list:
- if i[0].upper() not in exceptions:
- # It's of GREATEST importance that the i_arr variable
- # is assigned here before entering the atom_coor loop.!
- # Otherwise it will not be re-asigned when the satom_list
- # still iterates, but the atom_list is already empty...
- i_arr = np.array(i[1 + adj:])
- if atom_coor is not None:
- dist_matrix = euclidean_distances(
- atom_coor, i_arr.reshape(1, -1)
- )
- idx = (dist_matrix > 0.1) * (dist_matrix < max_dist)
- neighbours_indexes = np.where(idx)[0]
- for j in neighbours_indexes:
- j_arr = np.array(atom_coor[j])
- r_i_j = distance(i_arr, j_arr)
- r_cov_i_j = atomic_covalent_radius[
- i[0].upper()] + atomic_covalent_radius[
- atom_list[j][0].upper()]
- if r_cov_i_j - tol < r_i_j < r_cov_i_j + tol:
- working_list_temp.append(atom_list[j])
- if rebuild is not None:
- sdist_matrix = euclidean_distances(
- satom_coor, i_arr.reshape(1, -1))
- sidx = (sdist_matrix > 0.1) * (sdist_matrix < max_dist)
- sneighbours_indexes = np.where(sidx)[0]
- for j in sneighbours_indexes:
- if satom_list[j] in atom_list:
- pass
- else:
- j_arr = np.array(satom_coor[j])
- r_i_j = distance(i_arr, j_arr)
- r_cov_i_j = atomic_covalent_radius[
- i[0].upper()
- ] + atomic_covalent_radius[
- satom_list[j][0].upper()]
- if r_cov_i_j - tol < r_i_j < r_cov_i_j + tol:
- working_list_temp.append(satom_list[j])
- final_molecule.append(i)
- else:
- final_molecule.append(i)
- for i in working_list:
- try:
- atom_list.remove(i)
- except ValueError:
- pass
- # We empty the working list as all the items were analysed
- # and moved to the final_molecule list.
- working_list = []
- # We make sure there are no duplicates in the working_list_temp.
- working_list_temp = unique(working_list_temp)
- # Now we move the entries from the temporary working list
- # to the working list for looping analysys.
- for i in working_list_temp:
- # We make sure that only new and unassigned atoms are
- # being transfered.
- if i not in final_molecule:
- working_list.append(i)
- final_molecule_dict = {}
- final_molecule_dict['elements'] = np.array(
- [x[0] for x in final_molecule], dtype='str')
- final_molecule_dict['coordinates'] = np.array(
- [[*xyz[1 + adj:]] for xyz in final_molecule])
- if adj == 1:
- final_molecule_dict['atom_ids'] = np.array(
- [x[1] for x in final_molecule], dtype='str')
- # In general we always want the molecule so the initial bool_ is True.
- bool_ = True
- # But, for periodic only if the molecule is in the initial unit cell.
- if rebuild is not None:
- com = center_of_mass(final_molecule_dict['elements'],
- final_molecule_dict['coordinates'])
- com_frac = fractional_from_cartesian(com, matrix)[0]
- # If we don't round the numerical errors will come up.
- com_frac_round = np.around(com_frac, decimals=8)
- bool_ = np.all(np.logical_and(com_frac_round >= boundary[0],
- com_frac_round < boundary[1]),
- axis=0)
- if bool(bool_) is True:
- molecules.append(final_molecule_dict)
- return molecules
-
-
-[docs]def angle_between_vectors(x, y):
- """Calculate the angle between two vectors x and y."""
- first_step = abs(x[0] * y[0] + x[1] * y[1] + x[2] * y[2]) / (
- np.sqrt(x[0]**2 + x[1]**2 + x[2]**2) *
- np.sqrt(y[0]**2 + y[1]**2 + y[2]**2))
- second_step = np.arccos(first_step)
- return (second_step)
-
-
-[docs]def vector_analysis(vector, coordinates, elements_vdw, increment=1.0):
- """Analyse a sampling vector's path for window analysis purpose."""
- # Calculate number of chunks if vector length is divided by increment.
- chunks = int(np.linalg.norm(vector) // increment)
- # Create a single chunk.
- chunk = vector / chunks
- # Calculate set of points on vector's path every increment.
- vector_pathway = np.array([chunk * i for i in range(chunks + 1)])
- analysed_vector = np.array([
- np.amin(
- euclidean_distances(coordinates, i.reshape(1, -1)) - elements_vdw)
- for i in vector_pathway
- ])
- if all(i > 0 for i in analysed_vector):
- pos = np.argmin(analysed_vector)
- # As first argument we need to give the distance from the origin.
- dist = np.linalg.norm(chunk * pos)
- return np.array(
- [dist, analysed_vector[pos] * 2, *chunk * pos, *vector])
-
-
-[docs]def vector_preanalysis(vector, coordinates, elements_vdw, increment=1.0):
- norm_vec = vector/np.linalg.norm(vector)
- intersections = []
- origin = center_of_coor(coordinates)
- L = coordinates - origin
- t_ca = np.dot(L, norm_vec)
- d = np.sqrt(np.einsum('ij,ij->i', L, L) - t_ca**2)
- under_sqrt = elements_vdw**2 - d**2
- diag = under_sqrt.diagonal()
- positions = np.argwhere(diag > 0)
- for pos in positions:
- t_hc = np.sqrt(diag[pos[0]])
- t_0 = t_ca[pos][0] - t_hc
- t_1 = t_ca[pos][0] + t_hc
-
- P_0 = origin + np.dot(t_0, norm_vec)
- P_1 = origin + np.dot(t_1, norm_vec)
- # print(np.linalg.norm(P_0), np.linalg.norm(P_1))
- if np.linalg.norm(P_0) < np.linalg.norm(P_1):
- intersections.append(1)
- else:
- intersections.append(0)
- if sum(intersections) == 0:
- return vector_analysis(vector, coordinates, elements_vdw, increment)
-
-
-[docs]def optimise_xy(xy, *args):
- """Return negative pore diameter for x and y coordinates optimisation."""
- z, elements, coordinates = args
- window_com = np.array([xy[0], xy[1], z])
- return -pore_diameter(elements, coordinates, com=window_com)[0]
-
-
-[docs]def optimise_z(z, *args):
- """Return pore diameter for coordinates optimisation in z direction."""
- x, y, elements, coordinates = args
- window_com = np.array([x, y, z])
- return pore_diameter(elements, coordinates, com=window_com)[0]
-
-
-[docs]def window_analysis(window,
- elements,
- coordinates,
- elements_vdw,
- increment2=0.1,
- z_bounds=[None, None],
- lb_z=True,
- z_second_mini=False,
- **kwargs):
- """
- Return window diameter and window's centre.
-
- Parameters
- ----------
- widnow: list
-
- elements: numpy.array
-
- coordinates: numpy.array
-
- elements_vdw: numpy.array
-
- step: float
-
- """
- # Copy the coordinates as we will manipulate them.
- coordinates = deepcopy(coordinates)
- # Find the vector with the largest window sampling diameter from the pool.
- vector_ = window[window.argmax(axis=0)[1]][5:8]
- vector_analysed = vector_analysis(
- vector_, coordinates, elements_vdw, increment=increment2)
- # A safety check, if the refined analysis give None we end the function.
- if vector_analysed is not None:
- pass
- else:
- return None
- vector = vector_analysed[5:8]
- # Unit vectors.
- vec_a = [1, 0, 0]
- vec_b = [0, 1, 0]
- vec_c = [0, 0, 1]
- # Angles needed for rotation (in radians) to rotate and translate the
- # molecule for the vector to become the Z-axis.
- angle_1 = angle_between_vectors(np.array([vector[0], vector[1], 0]), vec_a)
- angle_2 = angle_between_vectors(vector, vec_c)
- # Depending in which cartesian coordinate system area the vector is
- # We need a rotation into a different direction and by different value.
- if vector[0] >= 0 and vector[1] >= 0 and vector[2] >= 0:
- angle_1 = -angle_1
- angle_2 = -angle_2
- if vector[0] < 0 and vector[1] >= 0 and vector[2] >= 0:
- angle_1 = np.pi * 2 + angle_1
- angle_2 = angle_2
- if vector[0] >= 0 and vector[1] < 0 and vector[2] >= 0:
- angle_1 = angle_1
- angle_2 = -angle_2
- if vector[0] < 0 and vector[1] < 0 and vector[2] >= 0:
- angle_1 = np.pi * 2 - angle_1
- if vector[0] >= 0 and vector[1] >= 0 and vector[2] < 0:
- angle_1 = -angle_1
- angle_2 = np.pi + angle_2
- if vector[0] < 0 and vector[1] >= 0 and vector[2] < 0:
- angle_2 = np.pi - angle_2
- if vector[0] >= 0 and vector[1] < 0 and vector[2] < 0:
- angle_2 = angle_2 + np.pi
- if vector[0] < 0 and vector[1] < 0 and vector[2] < 0:
- angle_1 = -angle_1
- angle_2 = np.pi - angle_2
- # Rotation matrix for rotation around Z-axis with angle_1.
- rotation_around_z = np.array([[np.cos(angle_1), -np.sin(angle_1), 0],
- [np.sin(angle_1), np.cos(angle_1), 0],
- [0, 0, 1]])
- # Rotate the whole molecule around with rotation_around_z.
- coordinates = np.array([np.dot(rotation_around_z, i) for i in coordinates])
- # Rotation matrix for rotation around Y-axis with angle_2
- rotation_around_y = np.array([[np.cos(angle_2), 0, np.sin(angle_2)],
- [0, 1, 0],
- [-np.sin(angle_2), 0, np.cos(angle_2)]])
- # Rotate the whole molecule around with rotation_around_y.
- coordinates = np.array([np.dot(rotation_around_y, i) for i in coordinates])
- # Third step is translation. We are now at [0, 0, -z].
- # We shift the molecule so that center of the window is at the origin.
- # The `z` is from original vector analysis. It is the point on the vector
- # where the largest sampling sphere was (vector_analysed[0]).
- new_z = vector_analysed[0]
- # Translate the whole molecule to shift window's center to origin.
- coordinates = coordinates - np.array([[0, 0, new_z]] *
- coordinates.shape[0])
- # !!!Here the window center (xy and z) optimisation take place!!!
- window_com = np.array([0, 0, 0], dtype=float)
- # The lb_z parameter is 'lower bound equal to z' which means,
- # that we set the lower bound for the z optimisation to be equal
- # to the -new_z as in some cages it's the COM - pore that is the
- # limiting diameter. But, no lower than new_z because we don't want to
- # move into the other direction.
- if lb_z:
- z_bounds[0] = -new_z
- window_diameter, _ = pore_diameter(elements, coordinates, com=window_com)
- # SciPy minimisation on z coordinate.
- z_args = (window_com[0], window_com[1], elements, coordinates)
- z_optimisation = minimize(
- optimise_z, x0=window_com[2], args=z_args, bounds=[z_bounds])
- # Substitute the z coordinate for a minimised one.
- window_com[2] = z_optimisation.x[0]
- # SciPy brute optimisation on x and y coordinates in window plane.
- xy_args = (window_com[2], elements, coordinates)
- xy_bounds = ((-window_diameter / 2, window_diameter / 2),
- (-window_diameter / 2, window_diameter / 2))
- xy_optimisation = brute(
- optimise_xy, xy_bounds, args=xy_args, full_output=True, finish=fmin)
- # Substitute the x and y coordinates for the optimised ones.
- window_com[0] = xy_optimisation[0][0]
- window_com[1] = xy_optimisation[0][1]
- # Additional SciPy minimisation on z coordinate. Added on 18 May 2017.
- # We can argue which aproach is best. Whether z opt and then xy opt
- # or like now z opt -> xy opt -> additional z opt etc. I have also tested
- # a loop of optimisations until some convergence and optimisation of
- # xyz coordinates at the same time by optimising these two optimisations.
- # In the end. I think this approach is best for cages.
- # Update 20 October 2017: I made this optional and turned off by default
- # In many cases that worsen the quality of the results and should be used
- # with caution.
- if z_second_mini is not False:
- z_args = (window_com[0], window_com[1], elements, coordinates)
- # The z_bounds should be passed in kwargs.
- z_optimisation = minimize(
- optimise_z, x0=window_com[2], args=z_args, bounds=[z_bounds])
- # Substitute the z coordinate for a minimised one.
- window_com[2] = z_optimisation.x[0]
- # Calculate the new window diameter.
- window_diameter, _ = pore_diameter(elements, coordinates, com=window_com)
- # To get the window true centre of mass we need to revere the rotation and
- # translation operations on the window com.
- # Reverse the translation by substracting the new_z.
- window_com[2] = window_com[2] + new_z
- angle_2_1 = -angle_2
- reverse_around_y = np.array([[np.cos(angle_2_1), 0, np.sin(angle_2_1)],
- [0, 1, 0],
- [-np.sin(angle_2_1), 0, np.cos(angle_2_1)]])
- # Reversing the second rotation around Y-axis.
- window_com = np.dot(reverse_around_y, window_com)
- angle_1_1 = -angle_1
- reverse_around_z = np.array([[np.cos(angle_1_1), -np.sin(angle_1_1), 0],
- [np.sin(angle_1_1), np.cos(angle_1_1), 0],
- [0, 0, 1]])
- # Reversing the first rotation around Z-axis.
- window_com = np.dot(reverse_around_z, window_com)
- return (window_diameter, window_com)
-
-
-[docs]def find_windows(elements,
- coordinates,
- processes=None,
- mol_size=None,
- adjust=1,
- pore_opt=True,
- increment=1.0,
- **kwargs):
- """Return windows diameters and center of masses for a molecule."""
- # Copy the coordinates as will perform many opertaions on them
- coordinates = deepcopy(coordinates)
- # Center of our cartesian system is always at origin
- origin = np.array([0, 0, 0])
- # Initial center of mass to reverse translation at the end
- initial_com = center_of_mass(elements, coordinates)
- # Shift the cage to the origin using either the standard center of mass
- # or if pore_opt flag is True, the optimised pore center as center of mass
- if pore_opt is True:
- # Normally the pore is calculated from the COM of a molecule.
- # So, esentially the molecule's COM is the pore center.
- # To shift the molecule so that the center of the optimised pore
- # is at the origin of the system and not the center of the not
- # optimised one, we need to adjust the shift. We also have to update
- # the initial com.
- com_adjust = initial_com - opt_pore_diameter(elements, coordinates, **
- kwargs)[2]
- initial_com = initial_com - com_adjust
- coordinates = shift_com(elements, coordinates, com_adjust=com_adjust)
- else:
- # Otherwise, we just shift the cage to the origin.
- coordinates = shift_com(elements, coordinates)
- # We create an array of vdw radii of elements.
- elements_vdw = np.array([[atomic_vdw_radius[x.upper()]] for x in elements])
- # We calculate maximum diameter of a molecule to determine the radius
- # of a sampling sphere neccessary to enclose the whole molecule.
- shpere_radius = max_dim(elements, coordinates)[2] / 2
- sphere_surface_area = 4 * np.pi * shpere_radius**2
- # Here we determine the number of sampling points necessary for a fine
- # sampling. Smaller molecules require more finner density of sampling
- # points on the sampling sphere's surface, whereas largen require less.
- # This formula was created so that larger molecule do not take much longer
- # to analyse, as number_sampling_points*length_of_sampling_vectors
- # results in quadratic increase of sampling time. The 250 factor was
- # specificly determined to produce close to 1 sampling point /Angstrom^2
- # for a sphere of radius ~ 24 Angstrom. We can adjust how fine is the
- # sampling by changing the adjust factor.
- number_of_points = int(np.log10(sphere_surface_area) * 250 * adjust)
- # Here I use code by Alexandre Devert for spreading points on a sphere:
- # http://blog.marmakoide.org/?p=1
- golden_angle = np.pi * (3 - np.sqrt(5))
- theta = golden_angle * np.arange(number_of_points)
- z = np.linspace(1 - 1.0 / number_of_points, 1.0 / number_of_points - 1.0,
- number_of_points)
- radius = np.sqrt(1 - z * z)
- points = np.zeros((number_of_points, 3))
- points[:, 0] = radius * np.cos(theta) * shpere_radius
- points[:, 1] = radius * np.sin(theta) * shpere_radius
- points[:, 2] = z * shpere_radius
- # Here we will compute the eps parameter for the sklearn.cluster.DBSCAN
- # (3-dimensional spatial clustering algorithm) which is the mean distance
- # to the closest point of all points.
- values = []
- tree = KDTree(points)
- for i in points:
- dist, ind = tree.query(i.reshape(1, -1), k=10)
- values.extend(dist)
- mean_distance = np.mean(values)
- # The best eps is parametrized when adding the mean distance and it's root.
- eps = mean_distance + mean_distance**0.5
- # Here we either run the sampling points vectors analysis in serial
- # or parallel. The vectors that go through molecular pores return
- # as analysed list with the increment at vector's path with largest
- # included sphere, coordinates for this narrow channel point. vectors
- # that find molecule on theirs path are return as NoneType object.
- # Parralel analysis on user's defined number of CPUs.
- if processes:
- pool = Pool(processes=processes)
- parallel = [
- pool.apply_async(
- vector_preanalysis,
- args=(
- point,
- coordinates,
- elements_vdw, ),
- kwds={'increment': increment}) for point in points
- ]
- results = [p.get() for p in parallel if p.get() is not None]
- pool.terminate()
- # Dataset is an array of sampling points coordinates.
- dataset = np.array([x[5:8] for x in results])
- else:
- results = [
- vector_preanalysis(
- point, coordinates, elements_vdw, increment=increment)
- for point in points
- ]
- results = [x for x in results if x is not None]
- dataset = np.array([x[5:8] for x in results])
- # If not a single vector was returned from the analysis it mean that
- # no molecular channels (what we call windows here) connects the
- # molecule's interior with the surroungsings (exterior space).
- # The number of windows in that case equals zero and zero is returned.
- # Otherwise we continue our search for windows.
- if len(results) == 0:
- return None
- else:
- # Perfomr DBSCAN to cluster the sampling points vectors.
- # the n_jobs will be developed later.
- # db = DBSCAN(eps=eps, n_jobs=_ncpus).fit(dataset)
- db = DBSCAN(eps=eps).fit(dataset)
- core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
- core_samples_mask[db.core_sample_indices_] = True
- labels = set(db.labels_)
- # Assing cluster label to a sampling point.
- clusters = [[i, j] for i, j in zip(results, db.labels_)]
- clustered_results = {label: [] for label in labels}
- # Create a dictionary of clusters with points listed.
- [clustered_results[i[1]].append(i[0]) for i in clusters]
- # No for the sampling point vector in each cluster that had
- # the widest channel's 'neck' is assumed to pass the closest
- # to the window's center and therefore will be passed to
- # window analysis function.
- # We also pass user defined settings for window analysis.
- # Again either in serlia or in parallel.
- # Noisy points get a cluster label -1, therefore we have to exclude it.
- if processes:
- pool = Pool(processes=processes)
- parallel = [
- pool.apply_async(
- window_analysis,
- args=(np.array(clustered_results[cluster]), elements,
- coordinates, elements_vdw),
- kwds=kwargs) for cluster in clustered_results
- if cluster != -1
- ]
- window_results = [p.get() for p in parallel if p.get() is not None]
- pool.terminate()
- else:
- window_results = [
- window_analysis(
- np.array(clustered_results[cluster]), elements,
- coordinates, elements_vdw, **kwargs)
- for cluster in clustered_results if cluster != -1
- ]
- # The function returns two numpy arrays, one with windows diameters
- # in Angstrom, second with corresponding windows center's coordinates
- windows = np.array([result[0] for result in window_results
- if result is not None])
- windows_coms = np.array(
- [np.add(result[1], initial_com) for result in window_results
- if result is not None])
- # Safety measures, if one of the windows is None or negative a warning
- # should be raised.
- for result in window_results:
- if result is None:
- msg_ = " ".join(
- ['Warning. One of the analysed windows has',
- 'returned as None. See manual.']
- )
- # print(msg_)
- elif result[0] < 0:
- msg_ = " ".join(
- ['Warning. One of the analysed windows has a vdW',
- 'corrected diameter smaller than 0. See manual.']
- )
- # print(msg_)
- return (windows, windows_coms)
-
-
-[docs]def window_shape(window,
- elements,
- coordinates,
- increment2=0.1,
- z_bounds=[None, None],
- lb_z=True,
- z_second_mini=False,
- **kwargs):
- """
- Return window diameter and window's centre.
-
- Parameters
- ----------
- widnow: list
-
- elements: numpy.array
-
- coordinates: numpy.array
-
- elements_vdw: numpy.array
-
- step: float
-
- """
- # Copy the coordinates as we will manipulate them.
- coordinates = deepcopy(coordinates)
- # We create an array of vdw radii of elements.
- elements_vdw = np.array([[atomic_vdw_radius[x.upper()]] for x in elements])
- # Find the vector with the largest window sampling diameter from the pool.
- vector_ = window[window.argmax(axis=0)[1]][5:8]
- vector_analysed = vector_analysis(
- vector_, coordinates, elements_vdw, increment=increment2)
- # A safety check, if the refined analysis give None we end the function.
- if vector_analysed is not None:
- pass
- else:
- return None
- vector = vector_analysed[5:8]
- # Unit vectors.
- vec_a = [1, 0, 0]
- vec_b = [0, 1, 0]
- vec_c = [0, 0, 1]
- # Angles needed for rotation (in radians) to rotate and translate the
- # molecule for the vector to become the Z-axis.
- angle_1 = angle_between_vectors(np.array([vector[0], vector[1], 0]), vec_a)
- angle_2 = angle_between_vectors(vector, vec_c)
- # Depending in which cartesian coordinate system area the vector is
- # We need a rotation into a different direction and by different value.
- if vector[0] >= 0 and vector[1] >= 0 and vector[2] >= 0:
- angle_1 = -angle_1
- angle_2 = -angle_2
- if vector[0] < 0 and vector[1] >= 0 and vector[2] >= 0:
- angle_1 = np.pi * 2 + angle_1
- angle_2 = angle_2
- if vector[0] >= 0 and vector[1] < 0 and vector[2] >= 0:
- angle_1 = angle_1
- angle_2 = -angle_2
- if vector[0] < 0 and vector[1] < 0 and vector[2] >= 0:
- angle_1 = np.pi * 2 - angle_1
- if vector[0] >= 0 and vector[1] >= 0 and vector[2] < 0:
- angle_1 = -angle_1
- angle_2 = np.pi + angle_2
- if vector[0] < 0 and vector[1] >= 0 and vector[2] < 0:
- angle_2 = np.pi - angle_2
- if vector[0] >= 0 and vector[1] < 0 and vector[2] < 0:
- angle_2 = angle_2 + np.pi
- if vector[0] < 0 and vector[1] < 0 and vector[2] < 0:
- angle_1 = -angle_1
- angle_2 = np.pi - angle_2
- # Rotation matrix for rotation around Z-axis with angle_1.
- rotation_around_z = np.array([[np.cos(angle_1), -np.sin(angle_1), 0],
- [np.sin(angle_1), np.cos(angle_1), 0],
- [0, 0, 1]])
- # Rotate the whole molecule around with rotation_around_z.
- coordinates = np.array([np.dot(rotation_around_z, i) for i in coordinates])
- # Rotation matrix for rotation around Y-axis with angle_2
- rotation_around_y = np.array([[np.cos(angle_2), 0, np.sin(angle_2)],
- [0, 1, 0],
- [-np.sin(angle_2), 0, np.cos(angle_2)]])
- # Rotate the whole molecule around with rotation_around_y.
- coordinates = np.array([np.dot(rotation_around_y, i) for i in coordinates])
- # Third step is translation. We are now at [0, 0, -z].
- # We shift the molecule so that center of the window is at the origin.
- # The `z` is from original vector analysis. It is the point on the vector
- # where the largest sampling sphere was (vector_analysed[0]).
- new_z = vector_analysed[0]
- # Translate the whole molecule to shift window's center to origin.
- coordinates = coordinates - np.array([[0, 0, new_z]] *
- coordinates.shape[0])
- # !!!Here the window center (xy and z) optimisation take place!!!
- window_com = np.array([0, 0, 0], dtype=float)
- # The lb_z parameter is 'lower bound equal to z' which means,
- # that we set the lower bound for the z optimisation to be equal
- # to the -new_z as in some cages it's the COM - pore that is the
- # limiting diameter. But, no lower than new_z because we don't want to
- # move into the other direction.
- if lb_z:
- z_bounds[0] = -new_z
- window_diameter, _ = pore_diameter(elements, coordinates, com=window_com)
- # SciPy minimisation on z coordinate.
- z_args = (window_com[0], window_com[1], elements, coordinates)
- z_optimisation = minimize(
- optimise_z, x0=window_com[2], args=z_args, bounds=[z_bounds])
- # Substitute the z coordinate for a minimised one.
- window_com[2] = z_optimisation.x[0]
- # SciPy brute optimisation on x and y coordinates in window plane.
- xy_args = (window_com[2], elements, coordinates)
- xy_bounds = ((-window_diameter / 2, window_diameter / 2),
- (-window_diameter / 2, window_diameter / 2))
- xy_optimisation = brute(
- optimise_xy, xy_bounds, args=xy_args, full_output=True, finish=fmin)
- # Substitute the x and y coordinates for the optimised ones.
- window_com[0] = xy_optimisation[0][0]
- window_com[1] = xy_optimisation[0][1]
- # Additional SciPy minimisation on z coordinate. Added on 18 May 2017.
- # We can argue which aproach is best. Whether z opt and then xy opt
- # or like now z opt -> xy opt -> additional z opt etc. I have also tested
- # a loop of optimisations until some convergence and optimisation of
- # xyz coordinates at the same time by optimising these two optimisations.
- # In the end. I think this approach is best for cages.
- # Update 20 October 2017: I made this optional and turned off by default
- # In many cases that worsen the quality of the results and should be used
- # with caution.
- if z_second_mini is not False:
- z_args = (window_com[0], window_com[1], elements, coordinates)
- # The z_bounds should be passed in kwargs.
- z_optimisation = minimize(
- optimise_z, x0=window_com[2], args=z_args, bounds=[z_bounds])
- # Substitute the z coordinate for a minimised one.
- window_com[2] = z_optimisation.x[0]
- # Getting the 2D plane crosssection of a window in XY plain. (10-04-18)
- # First translation around Z axis.
- vectors_translated = [
- [
- np.dot(rotation_around_z, i[5:])[0],
- np.dot(rotation_around_z, i[5:])[1],
- np.dot(rotation_around_z, i[5:])[2],
- ] for i in window
- ]
- # Second rotation around Y axis.
- vectors_translated = [
- [
- np.dot(rotation_around_y, i)[0],
- np.dot(rotation_around_y, i)[1],
- np.dot(rotation_around_y, i)[2]
- ] for i in vectors_translated
- ]
- ref_distance = (new_z - window_com[2]) / np.linalg.norm(vector)
- # Cutting the XY plane.
- XY_plane = np.array(
- [
- [i[0] * ref_distance, i[1] * ref_distance]
- for i in vectors_translated
- ]
- )
- return XY_plane
-
-
-[docs]def find_windows_new(elements,
- coordinates,
- processes=None,
- mol_size=None,
- adjust=1,
- pore_opt=True,
- increment=1.0,
- **kwargs):
- """Return windows diameters and center of masses for a molecule."""
- # Copy the coordinates as will perform many opertaions on them
- coordinates = deepcopy(coordinates)
- # Center of our cartesian system is always at origin
- origin = np.array([0, 0, 0])
- # Initial center of mass to reverse translation at the end
- initial_com = center_of_mass(elements, coordinates)
- # Shift the cage to the origin using either the standard center of mass
- # or if pore_opt flag is True, the optimised pore center as center of mass
- if pore_opt is True:
- # Normally the pore is calculated from the COM of a molecule.
- # So, esentially the molecule's COM is the pore center.
- # To shift the molecule so that the center of the optimised pore
- # is at the origin of the system and not the center of the not
- # optimised one, we need to adjust the shift. We also have to update
- # the initial com.
- com_adjust = initial_com - opt_pore_diameter(elements, coordinates, **
- kwargs)[2]
- initial_com = initial_com - com_adjust
- coordinates = shift_com(elements, coordinates, com_adjust=com_adjust)
- else:
- # Otherwise, we just shift the cage to the origin.
- coordinates = shift_com(elements, coordinates)
- # We create an array of vdw radii of elements.
- elements_vdw = np.array([[atomic_vdw_radius[x.upper()]] for x in elements])
- # We calculate maximum diameter of a molecule to determine the radius
- # of a sampling sphere neccessary to enclose the whole molecule.
- shpere_radius = max_dim(elements, coordinates)[2] / 2
- sphere_surface_area = 4 * np.pi * shpere_radius**2
- # Here we determine the number of sampling points necessary for a fine
- # sampling. Smaller molecules require more finner density of sampling
- # points on the sampling sphere's surface, whereas largen require less.
- # This formula was created so that larger molecule do not take much longer
- # to analyse, as number_sampling_points*length_of_sampling_vectors
- # results in quadratic increase of sampling time. The 250 factor was
- # specificly determined to produce close to 1 sampling point /Angstrom^2
- # for a sphere of radius ~ 24 Angstrom. We can adjust how fine is the
- # sampling by changing the adjust factor.
- number_of_points = int(np.log10(sphere_surface_area) * 250 * adjust)
- # Here I use code by Alexandre Devert for spreading points on a sphere:
- # http://blog.marmakoide.org/?p=1
- golden_angle = np.pi * (3 - np.sqrt(5))
- theta = golden_angle * np.arange(number_of_points)
- z = np.linspace(1 - 1.0 / number_of_points, 1.0 / number_of_points - 1.0,
- number_of_points)
- radius = np.sqrt(1 - z * z)
- points = np.zeros((number_of_points, 3))
- points[:, 0] = radius * np.cos(theta) * shpere_radius
- points[:, 1] = radius * np.sin(theta) * shpere_radius
- points[:, 2] = z * shpere_radius
- # Here we will compute the eps parameter for the sklearn.cluster.DBSCAN
- # (3-dimensional spatial clustering algorithm) which is the mean distance
- # to the closest point of all points.
- values = []
- tree = KDTree(points)
- for i in points:
- dist, ind = tree.query(i.reshape(1, -1), k=10)
- values.extend(dist)
- mean_distance = np.mean(values)
- # The best eps is parametrized when adding the mean distance and it's root.
- eps = mean_distance + mean_distance**0.5
- # Here we either run the sampling points vectors analysis in serial
- # or parallel. The vectors that go through molecular pores return
- # as analysed list with the increment at vector's path with largest
- # included sphere, coordinates for this narrow channel point. vectors
- # that find molecule on theirs path are return as NoneType object.
- # Parralel analysis on user's defined number of CPUs.
- if processes:
- pool = Pool(processes=processes)
- parallel = [
- pool.apply_async(
- vector_preanalysis,
- args=(
- point,
- coordinates,
- elements_vdw, ),
- kwds={'increment': increment}) for point in points
- ]
- results = [p.get() for p in parallel if p.get() is not None]
- pool.terminate()
- # Dataset is an array of sampling points coordinates.
- dataset = np.array([x[5:8] for x in results])
- else:
- results = [
- vector_preanalysis(
- point, coordinates, elements_vdw, increment=increment)
- for point in points
- ]
- results = [x for x in results if x is not None]
- dataset = np.array([x[5:8] for x in results])
- # If not a single vector was returned from the analysis it mean that
- # no molecular channels (what we call windows here) connects the
- # molecule's interior with the surroungsings (exterior space).
- # The number of windows in that case equals zero and zero is returned.
- # Otherwise we continue our search for windows.
- if len(results) == 0:
- return None
- else:
- # Perfomr DBSCAN to cluster the sampling points vectors.
- # the n_jobs will be developed later.
- # db = DBSCAN(eps=eps, n_jobs=_ncpus).fit(dataset)
- db = DBSCAN(eps=eps).fit(dataset)
- core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
- core_samples_mask[db.core_sample_indices_] = True
- labels = set(db.labels_)
- # Assing cluster label to a sampling point.
- clusters = [[i, j] for i, j in zip(results, db.labels_)]
- clustered_results = {label: [] for label in labels}
- # Create a dictionary of clusters with points listed.
- [clustered_results[i[1]].append(i[0]) for i in clusters]
- return clustered_results, elements, coordinates, initial_com
-
-
-[docs]def calculate_window_diameter(window, elements, coordinates, **kwargs):
- elements_vdw = np.array(
- [[atomic_vdw_radius[x.upper()]] for x in elements]
- )
- window_results = window_analysis(
- np.array(window), elements, coordinates, elements_vdw, **kwargs
- )
- # The function returns two numpy arrays, one with windows diameters
- # in Angstrom, second with corresponding windows center's coordinates
- if window_results:
- return window_results[0]
- else:
- return None
-
-
-[docs]def get_window_com(window, elements, coordinates, initial_com, **kwargs):
- elements_vdw = np.array(
- [[atomic_vdw_radius[x.upper()]] for x in elements]
- )
- window_results = window_analysis(
- np.array(window), elements, coordinates, elements_vdw, **kwargs
- )
- # The function returns two numpy arrays, one with windows diameters
- # in Angstrom, second with corresponding windows center's coordinates
- if window_results:
- # I correct the COM of window for the initial COM of the cage
- return np.add(window_results[1], initial_com)
- else:
- return None
-
-
-[docs]def vector_analysis_reversed(vector, coordinates, elements_vdw):
- norm_vec = vector/np.linalg.norm(vector)
- intersections = []
- origin = center_of_coor(coordinates)
- L = coordinates - origin
- t_ca = np.dot(L, norm_vec)
- d = np.sqrt(np.einsum('ij,ij->i', L, L) - t_ca**2)
- under_sqrt = elements_vdw**2 - d**2
- diag = under_sqrt.diagonal()
- positions = np.argwhere(diag > 0)
- for pos in positions:
- t_hc = np.sqrt(diag[pos[0]])
- t_0 = t_ca[pos][0] - t_hc
- t_1 = t_ca[pos][0] + t_hc
-
- P_0 = origin + np.dot(t_0, norm_vec)
- P_1 = origin + np.dot(t_1, norm_vec)
- if np.linalg.norm(P_0) < np.linalg.norm(P_1):
- intersections.append([np.linalg.norm(P_1), P_1])
- if intersections:
- intersection = sorted(intersections, reverse=True)[0][1]
- dist_origin = np.linalg.norm(intersection)
- return [dist_origin, intersection]
-
-
-[docs]def find_average_diameter(elements, coordinates, adjust=1, increment=0.1,
- processes=None, **kwargs):
- """Return average diameter for a molecule."""
- # Copy the coordinates as will perform many opertaions on them
- coordinates = deepcopy(coordinates)
- # Center of our cartesian system is always at origin
- origin = np.array([0, 0, 0])
- # Initial center of mass to reverse translation at the end
- initial_com = center_of_mass(elements, coordinates)
- # We just shift the cage to the origin.
- coordinates = shift_com(elements, coordinates)
- # We create an array of vdw radii of elements.
- elements_vdw = np.array([[atomic_vdw_radius[x.upper()]] for x in elements])
- # We calculate maximum diameter of a molecule to determine the radius
- # of a sampling sphere neccessary to enclose the whole molecule.
- shpere_radius = max_dim(elements, coordinates)[2]
- sphere_surface_area = 4 * np.pi * shpere_radius**2
- # Here we determine the number of sampling points necessary for a fine
- # sampling. Smaller molecules require more finner density of sampling
- # points on the sampling sphere's surface, whereas largen require less.
- # This formula was created so that larger molecule do not take much longer
- # to analyse, as number_sampling_points*length_of_sampling_vectors
- # results in quadratic increase of sampling time. The 250 factor was
- # specificly determined to produce close to 1 sampling point /Angstrom^2
- # for a sphere of radius ~ 24 Angstrom. We can adjust how fine is the
- # sampling by changing the adjust factor.
- number_of_points = int(np.log10(sphere_surface_area) * 250 * adjust)
- # Here I use code by Alexandre Devert for spreading points on a sphere:
- # http://blog.marmakoide.org/?p=1
- golden_angle = np.pi * (3 - np.sqrt(5))
- theta = golden_angle * np.arange(number_of_points)
- z = np.linspace(1 - 1.0 / number_of_points, 1.0 / number_of_points - 1.0,
- number_of_points)
- radius = np.sqrt(1 - z * z)
- points = np.zeros((number_of_points, 3))
- points[:, 0] = radius * np.cos(theta) * shpere_radius
- points[:, 1] = radius * np.sin(theta) * shpere_radius
- points[:, 2] = z * shpere_radius
- # Here we analyse the vectors and retain the ones that create the molecule
- # outline.
- if processes:
- pool = Pool(processes=processes)
- parallel = [
- pool.apply_async(
- vector_analysis_reversed,
- args=(
- point, coordinates, elements_vdw)
- ) for point in points
- ]
- results = [p.get() for p in parallel if p.get() is not None]
- pool.terminate()
- else:
- results = [
- vector_analysis_reversed(
- point, coordinates, elements_vdw)
- for point in points
- ]
- results_cleaned = [x[0] for x in results if x is not None]
- return np.mean(results_cleaned)*2
-
-
-[docs]def vector_analysis_pore_shape(vector, coordinates, elements_vdw):
- norm_vec = vector/np.linalg.norm(vector)
- intersections = []
- origin = center_of_coor(coordinates)
- L = coordinates - origin
- t_ca = np.dot(L, norm_vec)
- d = np.sqrt(np.einsum('ij,ij->i', L, L) - t_ca**2)
- under_sqrt = elements_vdw**2 - d**2
- diag = under_sqrt.diagonal()
- positions = np.argwhere(diag > 0)
- for pos in positions:
- t_hc = np.sqrt(diag[pos[0]])
- t_0 = t_ca[pos][0] - t_hc
- t_1 = t_ca[pos][0] + t_hc
-
- P_0 = origin + np.dot(t_0, norm_vec)
- P_1 = origin + np.dot(t_1, norm_vec)
- # print(np.linalg.norm(P_0), np.linalg.norm(P_1))
- if np.linalg.norm(P_0) < np.linalg.norm(P_1):
- intersections.append([np.linalg.norm(P_0), P_0])
- if intersections:
- return sorted(intersections)[0][1]
-
-
-[docs]def calculate_pore_shape(elements, coordinates, adjust=1, increment=0.1,
- **kwargs):
- """Return average diameter for a molecule."""
- # Copy the coordinates as will perform many opertaions on them
- coordinates = deepcopy(coordinates)
- # Center of our cartesian system is always at origin
- origin = np.array([0, 0, 0])
- # Initial center of mass to reverse translation at the end
- initial_com = center_of_mass(elements, coordinates)
- # We just shift the cage to the origin.
- coordinates = shift_com(elements, coordinates)
- # We create an array of vdw radii of elements.
- elements_vdw = np.array([[atomic_vdw_radius[x.upper()]] for x in elements])
- # We calculate maximum diameter of a molecule to determine the radius
- # of a sampling sphere neccessary to enclose the whole molecule.
- shpere_radius = max_dim(elements, coordinates)[2]/2
- sphere_surface_area = 4 * np.pi * shpere_radius**2
- # Here we determine the number of sampling points necessary for a fine
- # sampling. Smaller molecules require more finner density of sampling
- # points on the sampling sphere's surface, whereas largen require less.
- # This formula was created so that larger molecule do not take much longer
- # to analyse, as number_sampling_points*length_of_sampling_vectors
- # results in quadratic increase of sampling time. The 250 factor was
- # specificly determined to produce close to 1 sampling point /Angstrom^2
- # for a sphere of radius ~ 24 Angstrom. We can adjust how fine is the
- # sampling by changing the adjust factor.
- number_of_points = int(np.log10(sphere_surface_area) * 250 * adjust)
- # Here I use code by Alexandre Devert for spreading points on a sphere:
- # http://blog.marmakoide.org/?p=1
- golden_angle = np.pi * (3 - np.sqrt(5))
- theta = golden_angle * np.arange(number_of_points)
- z = np.linspace(1 - 1.0 / number_of_points, 1.0 / number_of_points - 1.0,
- number_of_points)
- radius = np.sqrt(1 - z * z)
- points = np.zeros((number_of_points, 3))
- points[:, 0] = radius * np.cos(theta) * shpere_radius
- points[:, 1] = radius * np.sin(theta) * shpere_radius
- points[:, 2] = z * shpere_radius
- # Here we will compute the eps parameter for the sklearn.cluster.DBSCAN
- # (3-dimensional spatial clustering algorithm) which is the mean distance
- # to the closest point of all points.
- values = []
- tree = KDTree(points)
- for i in points:
- dist, ind = tree.query(i.reshape(1, -1), k=10)
- values.extend(dist)
- mean_distance = np.mean(values)
- # The best eps is parametrized when adding the mean distance and it's root.
- eps = mean_distance + mean_distance**0.5
- # Here we either run the sampling points vectors analysis in serial
- # or parallel. The vectors that go through molecular voids return
- # as analysed list with the increment at vector's path with largest
- # included sphere, coordinates for this narrow channel point. vectors
- # that find molecule on theirs path are return as NoneType object.
- results = [
- vector_analysis_pore_shape(point, coordinates, elements_vdw)
- for point in points
- ]
- results_cleaned = [x for x in results if x is not None]
- ele = np.array(['X'] * len(results_cleaned))
- coor = np.array(results_cleaned)
- return coor
-
-
-[docs]def circumcircle_window(coordinates, atom_set):
- # Calculating circumcircle
- A = np.array(coordinates[int(atom_set[0])])
- B = np.array(coordinates[int(atom_set[1])])
- C = np.array(coordinates[int(atom_set[2])])
- a = np.linalg.norm(C - B)
- b = np.linalg.norm(C - A)
- c = np.linalg.norm(B - A)
- s = (a + b + c) / 2
- # Holden et al. method is intended to only work with triads of carbons,
- # therefore I substract the vdW radii for a carbon.
- # These equation calculaties the window's radius.
- R = a*b*c / 4 / np.sqrt(s * (s - a) * (s - b) * (s - c)) - 1.70
- # This steps are used to calculate the window's COM.
- b1 = a*a * (b*b + c*c - a*a)
- b2 = b*b * (a*a + c*c - b*b)
- b3 = c*c * (a*a + b*b - c*c)
- COM = np.column_stack((A, B, C)).dot(np.hstack((b1, b2, b3)))
- # The window's COM.
- COM /= b1 + b2 + b3
- return R, COM
-
-
-[docs]def circumcircle(coordinates, atom_sets):
- pld_diameter_list = []
- pld_com_list = []
- iter_ = 0
- while iter_ < len(atom_sets):
- R, COM = circumcircle_window(coordinates, atom_sets[iter_])
- pld_diameter_list.append(R*2)
- pld_com_list.append(COM)
- iter_ += 1
- return pld_diameter_list, pld_com_list
-