Source code for CodeEntropy.levels.dihedrals.conformational_state_builder
"""Public conformational-state builder for dihedral analysis.
This module keeps the stable ``ConformationStateBuilder`` entry point used by
``ConformationDAG`` while the implementation is split across domain-specific
helpers for topology discovery, angle observation, peak detection, and state
assignment.
"""
from __future__ import annotations
import logging
from typing import Any
from rich.progress import TaskID
from CodeEntropy.levels.dihedrals.peak_detection import ConformationPeakDetector
from CodeEntropy.levels.dihedrals.state_assignment import (
ConformationStateAssigner,
UAKey,
)
from CodeEntropy.results.reporter import _RichProgressSink
from CodeEntropy.trajectory.frames import FrameSelection
logger = logging.getLogger(__name__)
[docs]
class ConformationStateBuilder(ConformationPeakDetector, ConformationStateAssigner):
"""Build conformational state labels from selected-frame dihedral angles."""
def __init__(self, universe_operations: Any) -> None:
"""Initialise the analysis helper.
Args:
universe_operations: Object providing helper methods:
- extract_fragment(data_container, molecule_id)
- select_atoms(atomgroup, selection_string)
"""
self._universe_operations = universe_operations
[docs]
def build_conformational_states(
self,
data_container: Any,
levels: dict[Any, list[str]],
groups: dict[int, list[Any]],
bin_width: float,
frame_selection: FrameSelection,
progress: _RichProgressSink | None = None,
chunk_size: int | None = None,
) -> tuple[dict[UAKey, list[str]], list[list[str]], dict[UAKey, int], list[int]]:
"""Build conformational state labels from selected trajectory frames.
Args:
data_container: MDAnalysis Universe or compatible container used to
extract fragments and compute dihedral time series.
levels: Mapping of molecule id to enabled level names.
groups: Mapping of group id to molecule ids.
bin_width: Histogram bin width in degrees used when identifying peak
dihedral populations.
frame_selection: FrameSelection controlling which absolute frames are
analysed.
progress: Optional progress sink.
chunk_size: Optional internal frame chunk size. When omitted, the
full selected-frame range is processed as a single chunk.
Returns:
Tuple ``(states_ua, states_res, flexible_ua, flexible_res)``.
"""
if chunk_size is None:
chunk_size = max(1, int(frame_selection.n_frames))
return self._build_conformational_states_serial_chunked(
data_container=data_container,
levels=levels,
groups=groups,
bin_width=bin_width,
frame_selection=frame_selection,
chunk_size=chunk_size,
progress=progress,
)
def _build_conformational_states_serial_chunked(
self,
data_container: Any,
levels: dict[Any, list[str]],
groups: dict[int, list[Any]],
bin_width: float,
frame_selection: FrameSelection,
chunk_size: int,
progress: _RichProgressSink | None = None,
) -> tuple[dict[UAKey, list[str]], list[list[str]], dict[UAKey, int], list[int]]:
"""Build conformational states with serial frame-chunk map-reduce.
Args:
data_container: MDAnalysis universe.
levels: Mapping of molecule id to enabled level names.
groups: Mapping of group id to molecule ids.
bin_width: Histogram bin width in degrees.
frame_selection: Selected absolute trajectory frames.
chunk_size: Number of selected frames per chunk.
progress: Optional progress sink.
Returns:
Tuple ``(states_ua, states_res, flexible_ua, flexible_res)``.
Raises:
ValueError: If ``chunk_size`` is less than one.
"""
if chunk_size < 1:
raise ValueError("chunk_size must be >= 1")
number_groups = len(groups)
states_ua: dict[UAKey, list[str]] = {}
states_res: list[list[str]] = [[] for _ in range(number_groups)]
flexible_ua: dict[UAKey, int] = {}
flexible_res: list[int] = []
task: TaskID | None = None
if progress is not None:
task = progress.add_task(
"[green]Conformational states",
total=max(1, len(groups)),
title="Initializing",
)
if not groups:
if progress is not None and task is not None:
progress.update(task, title="No groups")
progress.advance(task)
return states_ua, states_res, flexible_ua, flexible_res
for group_id, molecules in groups.items():
if not molecules:
if progress is not None and task is not None:
progress.update(task, title=f"Group {group_id} (empty)")
progress.advance(task)
continue
if progress is not None and task is not None:
progress.update(task, title=f"Group {group_id}")
level_list = levels[molecules[0]]
topologies = self._discover_group_dihedral_topology(
data_container=data_container,
group_id=group_id,
molecules=molecules,
level_list=level_list,
)
tasks = self._build_conformation_chunk_tasks(
topologies=topologies,
frame_selection=frame_selection,
chunk_size=chunk_size,
)
topology_by_order = {
topology.molecule_order: topology for topology in topologies
}
observables = [
self._collect_angle_observable(
topology=topology_by_order[task_item.molecule_order],
task=task_item,
level_list=level_list,
)
for task_item in tasks
]
peak_data = self._reduce_angle_observables_to_peak_data(
observables=observables,
level_list=level_list,
bin_width=bin_width,
)
state_partials = [
self._assign_state_partial_from_observable(
observable=observable,
topology=topology_by_order[observable.task.molecule_order],
level_list=level_list,
peaks_ua=peak_data.peaks_ua,
peaks_res=peak_data.peaks_res,
)
for observable in observables
]
state_data = self._reduce_state_partials(state_partials)
self._merge_group_state_data(
state_data=state_data,
states_ua=states_ua,
states_res=states_res,
flexible_ua=flexible_ua,
flexible_res=flexible_res,
)
if progress is not None and task is not None:
progress.advance(task)
logger.debug("States UA: %s", states_ua)
logger.debug("Number of flexible dihedrals UA: %s", flexible_ua)
logger.debug("States Res: %s", states_res)
logger.debug("Number of flexible dihedrals Res: %s", flexible_res)
return states_ua, states_res, flexible_ua, flexible_res