Source code for CodeEntropy.levels.neighbors

"""Neighbours info for orientational entropy.

This module finds the average number of neighbors, symmetry numbers, and
and linearity for each group.
These are used downstream to compute the orientational entropy.
"""

import logging

import numpy as np
from rdkit import Chem

from CodeEntropy.levels.search import Search

logger = logging.getLogger(__name__)


[docs] class Neighbors: """ Class to find the neighbors and any related information needed for the calculation of orientational entropy. """ def __init__(self): """ Initializes the Neighbors class with placeholders for data, including the system trajectory, groups, and levels. """ self._universe = None self._groups = None self._levels = None self._search = Search()
[docs] def get_neighbors(self, universe, levels, groups, frame_source, search_type): """Find average neighbour counts for each molecule group. Args: universe: MDAnalysis universe object for the active analysis system. levels: Level list for each molecule. groups: Mapping of group id to molecule ids. frame_source: FrameSource controlling selected trajectory access. search_type: Neighbour search method, either ``"RAD"`` or ``"grid"``. Returns: Dict mapping group id to average number of neighbours. Raises: ValueError: If ``search_type`` is unknown. """ frame_indices = [ int(frame_index) for frame_index in frame_source.iter_indices() ] n_frames = len(frame_indices) if n_frames <= 0: return {group_id: 0.0 for group_id in groups.keys()} number_neighbors = {} average_number_neighbors = {} for group_id in groups.keys(): molecules = groups[group_id] highest_level = levels[molecules[0]][-1] for mol_id in molecules: for frame_index in frame_indices: if search_type == "RAD": neighbors = self._search.get_RAD_neighbors( universe=universe, mol_id=mol_id, frame_source=frame_source, frame_index=frame_index, ) elif search_type == "grid": neighbors = self._search.get_grid_neighbors( universe=universe, mol_id=mol_id, highest_level=highest_level, frame_source=frame_source, frame_index=frame_index, ) else: raise ValueError(f"unknown search_type {search_type}") number_neighbors.setdefault(group_id, []).append(len(neighbors)) number = np.sum(number_neighbors[group_id]) average_number_neighbors[group_id] = number / (len(molecules) * n_frames) logger.debug( "group: %s number neighbors %s", group_id, average_number_neighbors[group_id], ) return average_number_neighbors
[docs] def get_symmetry(self, universe, groups): """ Calculate symmetry number for the molecule. This function converts the MDAnalysis instance of a molecule into an RDKit object and then calculates the symmetry number and determines if the molecule is linear. Args: universe: MDAnalysis object mol_id: index of the molecule of interest Returns: symmetry_number: dict, symmetry number of each group linear: dict, linear for each group """ symmetry_number = {} linear = {} for group_id in groups.keys(): molecules = groups[group_id] rdkit_mol, number_heavy, number_hydrogen = self._get_rdkit_mol( universe, molecules[0] ) symmetry_number[group_id] = self._get_symmetry_number( rdkit_mol, number_heavy, number_hydrogen ) linear[group_id] = self._get_linear(rdkit_mol, number_heavy) logger.debug( f"group: {group_id}, symmetry: {symmetry_number}, linear: {linear}" ) return symmetry_number, linear
def _get_rdkit_mol(self, universe, mol_id): """ Convert molecule to rdkit object. MDAnalysis convert_to(RDKIT) needs elements. We are removing dummy atoms and converting to rkdit format. If there are dummy atoms you need inferrer=None otherwise you get errors from it getting the valence wrong. If possible it is better to use the inferrer to get the bonds and hybridization correct. The convert_to argument force=True forces it to continue even when it cannot find hydrogens, this is needed to avoid errors for molecules like carbon dioxide which do not have hydrogens. Args: universe: MDAnalysis object mol_id: index of the molecule of interest Returns: rdkit_mol number_heavy number_hydrogen """ if not hasattr(universe.atoms, "elements"): universe.guess_TopologyAttrs(to_guess=["elements"]) molecule = universe.atoms.fragments[mol_id] dummy = molecule.select_atoms("prop mass < 0.1") if len(dummy) > 0: frag = molecule.select_atoms("prop mass > 0.1") rdkit_mol = frag.convert_to("RDKIT", force=True, inferrer=None) logger.debug("Warning: Dummy atoms found") else: try: rdkit_mol = molecule.convert_to("RDKIT", force=True) except Exception: logger.debug("Warning: Constraint bonds to H atoms found") rdkit_mol = molecule.convert_to("RDKIT", force=True, inferrer=None) number_heavy = rdkit_mol.GetNumHeavyAtoms() number_hydrogen = rdkit_mol.GetNumAtoms() - number_heavy return rdkit_mol, number_heavy, number_hydrogen def _get_symmetry_number(self, rdkit_mol, number_heavy, number_hydrogen): """ Calculate symmetry number for the molecule. For larger molecules, removing the hydrogens reduces the over counting of symmetry states. When there is only one heavy atom the hydrogens are important to the symmetry. If there is a single heavy atom with no hydrogens then the molecule is spherically symmetric. Using the RDKit GetSubstructMatches function often works well as a guess for the symmetry number, but it occasionally returns a symmetry number 2x the expected value (for example, cyclohexane). Args: rdkit_mol: rdkit object for molecule of interest number_heavy (int): number of heavy atoms number_hydrogen (int): number of hydrogen atoms Returns: symmetry_number (int): symmetry number of molecule """ if number_heavy > 1: rdkit_heavy = Chem.RemoveHs(rdkit_mol) matches = rdkit_mol.GetSubstructMatches( rdkit_heavy, uniquify=False, useChirality=True ) symmetry_number = len(matches) elif number_hydrogen > 0: matches = rdkit_mol.GetSubstructMatches( rdkit_mol, uniquify=False, useChirality=True ) symmetry_number = len(matches) else: symmetry_number = 0 return symmetry_number def _get_linear(self, rdkit_mol, number_heavy): """ Determine if the molecule is linear. We are not considering the hydrogens, just the united atom beads. So, molecules like methanol are treated as linear since they have only two united atoms. Args: rkdit_mol: rdkit object for molecule of interest number_heavy (int): number of heavy atoms Returns: linear (bool): True if molecule linear """ linear = False if number_heavy == 1: linear = False elif number_heavy == 2: linear = True else: rdkit_heavy = Chem.RemoveHs(rdkit_mol) sp_count = 0 for x in rdkit_heavy.GetAtoms(): if x.GetHybridization() == Chem.HybridizationType.SP: sp_count += 1 if sp_count >= (number_heavy - 2): linear = True return linear