"""
MDAnalysis universe utilities.
This module contains helpers for creating reduced MDAnalysis `Universe` objects by
sub-selecting frames and/or atoms, and for building a `Universe` that combines
coordinates from one trajectory with forces sourced from a second trajectory.
"""
from __future__ import annotations
import logging
import MDAnalysis as mda
from MDAnalysis.analysis.base import AnalysisFromFunction
from MDAnalysis.coordinates.memory import MemoryReader
from MDAnalysis.exceptions import NoDataError
logger = logging.getLogger(__name__)
[docs]
class UniverseOperations:
"""Functions to create and manipulate MDAnalysis Universe objects.
This helper provides methods to:
- Build reduced universes by selecting subsets of frames or atoms.
- Extract a single fragment (molecule) into a standalone universe.
- Merge coordinates from one trajectory with forces from another trajectory.
"""
def __init__(self) -> None:
"""Initialise the operations helper."""
self._universe = None
[docs]
def select_frames(
self,
u: mda.Universe,
start: int | None = None,
end: int | None = None,
step: int = 1,
) -> mda.Universe:
"""Create a reduced universe by dropping frames according to user selection.
Args:
u: A Universe object with topology, coordinates and (optionally) forces.
start: Frame index to start analysis. If None, defaults to 0.
end: Frame index to stop analysis (Python slicing semantics). If None,
defaults to the full trajectory length.
step: Step size between frames.
Returns:
A reduced universe containing the selected frames, with coordinates,
forces (if present) and unit cell dimensions loaded into memory.
"""
if start is None:
start = 0
if end is None:
end = len(u.trajectory)
select_atom = u.select_atoms("all", updating=True)
coordinates = self._extract_timeseries(select_atom, kind="positions")[
start:end:step
]
forces = self._extract_timeseries(select_atom, kind="forces")[start:end:step]
dimensions = self._extract_timeseries(select_atom, kind="dimensions")[
start:end:step
]
u2 = mda.Merge(select_atom)
u2.load_new(
coordinates,
format=MemoryReader,
forces=forces,
dimensions=dimensions,
)
logger.debug(f"MDAnalysis.Universe - reduced universe (frame-selected): {u2}")
return u2
[docs]
def select_atoms(self, u: mda.Universe, select_string: str = "all") -> mda.Universe:
"""Create a reduced universe by dropping atoms according to user selection.
Args:
u: A Universe object with topology, coordinates and (optionally) forces.
select_string: MDAnalysis `select_atoms` selection string.
Returns:
A reduced universe containing only the selected atoms. Coordinates,
forces (if present) and dimensions are loaded into memory.
"""
select_atom = u.select_atoms(select_string, updating=True)
coordinates = self._extract_timeseries(select_atom, kind="positions")
forces = self._extract_timeseries(select_atom, kind="forces")
dimensions = self._extract_timeseries(select_atom, kind="dimensions")
u2 = mda.Merge(select_atom)
u2.load_new(
coordinates,
format=MemoryReader,
forces=forces,
dimensions=dimensions,
)
logger.debug(f"MDAnalysis.Universe - reduced universe (atom-selected): {u2}")
return u2
[docs]
def convert_lammps(
self,
tprfile: str,
trrfile,
fileformat: str | None = None,
) -> mda.Universe:
"""Update the units produced from the universe produced from LAMMPS
format topology and trajectory files. MDA currently has a bug that
results in forces not being converted to the correct units
(see issue for more details:
https://github.com/MDAnalysis/mdanalysis/issues/5115
)
The method currently expects the following additional columns in the
lammps dump file: fx fy fz c_5 c_7
where c_5 and c_7 are the atom potential and kinetic energies respectively.
This method loads:
- Coordinates and dimensions from the coordinate trajectory
(``tprfile`` + ``trrfile``).
Args:
tprfile: Topology input file.
trrfile: Coordinate trajectory file(s). This can be a single path or a
list, as accepted by MDAnalysis.
fileformat: Optional file format for the coordinate trajectory, as
recognised by MDAnalysis.
Returns:
MDAnalysis.Universe: A new Universe containing coordinates, forces and
dimensions loaded into memory.
Raises:
ValueError: If fileformat is not one of the supported values.
"""
def _convert_lammps_forces_energies(ts):
"""
Convert lammps forces from kcal/mol/Ang to kJ/mol/Ang.
Assumes columns for per-atom potential (c_5) and kinetic energies (c_7)
are provided and converts these too.
Args:
ts: MDAnalysis timeseries from the trajectory.
Returns:
A converted time series.
"""
ts.forces *= 4.184
ts.data["c_5"] *= 4.184
ts.data["c_7"] *= 4.184
return ts
def _convert_lammps_forces(ts):
"""
Convert lammps forces from kcal/mol/Ang to kJ/mol/Ang.
Args:
ts: MDAnalysis timeseries from the trajectory.
Returns:
A converted time series.
"""
ts.forces *= 4.184
return ts
if fileformat == "LAMMPSDUMP":
try:
return mda.Universe(
tprfile,
trrfile,
format=fileformat,
additional_columns=["fx", "fy", "fz", "c_5", "c_7"],
transformations=[_convert_lammps_forces_energies],
)
except KeyError:
logger.debug(
f"Warning: Energy columns not found in LAMMPSDUMP: {trrfile}"
)
return mda.Universe(
tprfile,
trrfile,
format=fileformat,
additional_columns=["fx", "fy", "fz"],
transformations=[_convert_lammps_forces],
)
else:
raise ValueError(
f"Incorrect file format: {fileformat}, LAMMPSDUMP expected"
)
[docs]
def merge_forces(
self,
tprfile: str,
trrfile,
forcefile: str,
fileformat: str | None = None,
kcal: bool = False,
*,
force_format: str | None = None,
fallback_to_positions_if_no_forces: bool = True,
) -> mda.Universe:
"""Create a universe by merging coordinates and forces from different files.
This method loads:
- Coordinates and dimensions from the coordinate trajectory
(``tprfile`` + ``trrfile``).
- Forces from the force trajectory (``tprfile`` + ``forcefile``).
If the force trajectory does not expose forces in MDAnalysis (e.g., the file
does not contain forces, or the reader does not provide them), then:
- If ``fallback_to_positions_if_no_forces`` is True, positions from the
force trajectory are used as the "forces" array (backwards-compatible
behaviour with earlier implementations).
- Otherwise, the underlying ``NoDataError`` is raised.
Args:
tprfile: Topology input file.
trrfile: Coordinate trajectory file(s). This can be a single path or a
list, as accepted by MDAnalysis.
forcefile: Trajectory containing forces.
fileformat: Optional file format for the coordinate trajectory, as
recognised by MDAnalysis.
kcal: If True, scale the force array by 4.184 to convert from kcal to kJ.
force_format: Optional file format for the force trajectory. If not
provided, uses ``fileformat``.
fallback_to_positions_if_no_forces: If True, and the force trajectory has
no accessible forces, use positions from the force trajectory as a
fallback (legacy behaviour).
Returns:
MDAnalysis.Universe: A new Universe containing coordinates, forces and
dimensions loaded into memory.
"""
logger.debug(f"Loading coordinate Universe with {trrfile}")
u = mda.Universe(tprfile, trrfile, format=fileformat)
ff = force_format if force_format is not None else fileformat
logger.debug(f"Loading force Universe with {forcefile}")
u_force = mda.Universe(tprfile, forcefile, format=ff)
select_atom = u.select_atoms("all")
select_atom_force = u_force.select_atoms("all")
coordinates = self._extract_timeseries(select_atom, kind="positions")
dimensions = self._extract_timeseries(select_atom, kind="dimensions")
forces = self._extract_force_timeseries_with_fallback(
select_atom_force,
fallback_to_positions_if_no_forces=fallback_to_positions_if_no_forces,
)
if kcal:
forces *= 4.184
logger.debug("Merging forces with coordinates universe.")
new_universe = mda.Merge(select_atom)
new_universe.load_new(
coordinates,
forces=forces,
dimensions=dimensions,
)
return new_universe
def _extract_timeseries(self, atomgroup, *, kind: str):
"""Extract a time series array for the requested kind from an AtomGroup.
Args:
atomgroup: MDAnalysis AtomGroup (may be updating).
kind: One of {"positions", "forces", "dimensions"}.
Returns:
Time series with shape:
- positions: (n_frames, n_atoms, 3)
- forces: (n_frames, n_atoms, 3) if available, else raises NoDataError
- dimensions: (n_frames, 6) or (n_frames, 3) depending on reader
Raises:
ValueError: If kind is not one of the supported values.
NoDataError: If kind is "forces" and the trajectory does not provide
forces via the configured reader.
"""
if kind == "positions":
func = self._positions_copy
elif kind == "forces":
func = self._forces_copy
elif kind == "dimensions":
func = self._dimensions_copy
else:
raise ValueError(f"Unknown timeseries kind: {kind}")
return AnalysisFromFunction(func, atomgroup).run().results["timeseries"]
def _positions_copy(self, ag):
"""Return a copy of positions for AnalysisFromFunction.
Args:
ag: MDAnalysis AtomGroup.
Returns:
Copy of ag.positions.
"""
return ag.positions.copy()
def _forces_copy(self, ag):
"""Return a copy of forces for AnalysisFromFunction.
Args:
ag: MDAnalysis AtomGroup.
Returns:
Copy of ag.forces.
"""
return ag.forces.copy()
def _dimensions_copy(self, ag):
"""Return a copy of box dimensions for AnalysisFromFunction.
Args:
ag: MDAnalysis AtomGroup.
Returns:
Copy of ag.dimensions.
"""
return ag.dimensions.copy()
def _extract_force_timeseries_with_fallback(
self,
atomgroup_force,
*,
fallback_to_positions_if_no_forces: bool,
):
"""Extract force timeseries, optionally falling back to positions.
This isolates the behaviour that changed your runtime outcome: older code
used positions from the force trajectory, which never triggered `NoDataError`.
This method keeps that behaviour available for backwards compatibility.
Args:
atomgroup_force: MDAnalysis AtomGroup sourced from the force trajectory.
fallback_to_positions_if_no_forces: If True, fall back to extracting
positions when forces are unavailable; otherwise re-raise NoDataError.
Returns:
A time series array of shape (n_frames, n_atoms, 3). The returned array
contains forces when available, otherwise positions if fallback is enabled.
Raises:
NoDataError: If forces are unavailable and
fallback_to_positions_if_no_forces is False.
"""
try:
return self._extract_timeseries(atomgroup_force, kind="forces")
except NoDataError:
if not fallback_to_positions_if_no_forces:
raise
return self._extract_timeseries(atomgroup_force, kind="positions")