Source code for meld.system.indexing

"""
Look up atom and residue indices.

The :class:`Indexer` class is the main object for performing
indexing look ups.

An :class:`AtomIndex` is a zero-based absolute atom index.
A :class:`ResidueIndex` is a zero-based absolute residue index.
"""

from typing import Dict, List, NamedTuple, Optional, Tuple


class _ChainInfo(NamedTuple):
    residues: Dict[int, int]
    "maps from index_within_chain to global_index"


class _SubSystemInfo(NamedTuple):
    n_residues: int
    "number of residues in this subsystem"
    chains: List[_ChainInfo]
    "a list of chains in the subsystem"


#
# We create AtomIndex and ResidueIndex classes that just
# wrap int. This allows us to check elsewhere in the code
# that we're getting the right type of index.
[docs]class AtomIndex(int):
    """
    Zero-based absolute atom index.

    Usually, you should get this through `system.atom_index`,
    but if you are _sure_ you know what you are doing,
    you can construct directly via `AtomIndex(index)`.
    """

    pass


[docs]class ResidueIndex(int):
    """
    Zero-based absolute residue index.

    Usually, you should get this through `system.residue_index`,
    but if you are _sure_ you know what you are doing,
    you can construct directly via `ResidueIndex(index)`.
    """

    pass


#
# Dictionary to cannonicalize residue names
#
cannonicalize = {
    "HID": "HIS",
    "HIE": "HIS",
    "HIP": "HIS",
    "ASH": "ASP",
    "GLH": "GLU",
    "LYN": "LYS",
    "CYX": "CYS",
}


[docs]class Indexer:
    """
    An object for performing atom and residue index lookups.
    """

[docs]    def __init__(
        self,
        abs_atom_index: Dict[Tuple[int, str], int],
        rel_residue_index: Dict[Tuple[int, int], int],
        residue_names: List[str],
        abs_resid_to_resname: Dict[int, str],
    ):
        """
        Initialize an Indexer object.

        Args:
            abs_atom_index: maps (resid, atom_name) to atomid
            rel_residue_index: maps (chainid, rel_resid) to resid
            residue_names: residue name for each atom
            abs_resid_to_resname: maps resid to resname
        """
        self.abs_atom_index = abs_atom_index
        self.rel_residue_index = rel_residue_index
        self.residue_names = residue_names
        self.abs_resid_to_resname = abs_resid_to_resname

[docs]    def residue(
        self,
        resid: int,
        expected_resname: Optional[str] = None,
        chainid: Optional[int] = None,
        one_based: bool = False,
    ) -> ResidueIndex:
        """
        Find the :class:`ResidueIndex`

        The indexing can be either absolute (if `chainid` is `None`),
        or relative to a chain (if `chainid` is set).

        Both `resid` and `chainid` are one-based if `one_based` is `True`,
        or both are zero-based if `one_based=False` (the default).

        If `expected_resname` is specified, error checking will be performed to
        ensure that the returned atom has the expected residue name. Note
        that the residue names are those after processing with `tleap`,
        so some residue names may not match their value in an input pdb file.

        Args:
            resid: the residue index to lookup
            expected_resname: the expected residue name, usually three all-caps characters,
                e.g. "ALA".
            chainid: the chain id to lookup
            one_based: use one-based indexing

        Returns:
            zero-based absolute residue index
        """
        if chainid is None:
            if one_based:
                res_index = resid - 1
            else:
                res_index = resid
        else:
            if one_based:
                res_index = self.rel_residue_index[(chainid - 1, resid - 1)]
            else:
                res_index = self.rel_residue_index[(chainid, resid)]

        if expected_resname is not None:
            actual_resname = self.abs_resid_to_resname[res_index]
            if actual_resname in cannonicalize:
                actual_resname = cannonicalize[actual_resname]
            if actual_resname != expected_resname:
                raise KeyError(
                    f"expected_resname={expected_resname}, but found res_name={actual_resname}."
                )

        return ResidueIndex(res_index)

[docs]    def atom(
        self,
        resid: int,
        atom_name: str,
        expected_resname: Optional[str] = None,
        chainid: Optional[int] = None,
        one_based: bool = False,
    ) -> AtomIndex:
        """
        Find the :class:`AtomIndex`

        The indexing can be either absolute (if `chainid` is `None`),
        or relative to a chain (if `chainid` is set).

        Both `resid` and `chainid` are one-based if `one_based` is `True`,
        or both are zero-based if `one_based=False` (the default).

        If `expected_resname` is specified, error checking will be performed to
        ensure that the returned atom has the expected residue name. Note
        that the residue names are those after processing with `tleap`,
        so some residue names may not match their value in an input pdb file.

        Args:
            resid: the residue index to lookup
            atom_name: the name of the atom to lookup
            expected_resname: the expected residue name, usually three all-caps characters,
                e.g. "ALA".
            chainid: the chain id to lookup
            one_based: use one-based indexing

        Returns:
            zero-based absolute atom index
        """
        resindex = self.residue(resid, expected_resname, chainid, one_based)
        return AtomIndex(self.abs_atom_index[resindex, atom_name])


[docs]def setup_indexing(topology):
    n_atoms = topology.getNumAtoms()

    atom_names = [atom.name for atom in topology.atoms()]
    assert len(atom_names) == n_atoms

    atom_numbers = [atom.index for atom in topology.atoms()]
    assert len(atom_numbers) == n_atoms

    residue_names = [atom.residue.name for atom in topology.atoms()]
    assert len(residue_names) == n_atoms

    residue_numbers = [atom.residue.index for atom in topology.atoms()]
    assert len(residue_numbers) == n_atoms

    # Setup mapping of resid to resname
    resid_to_resname = {}
    for resid, resname in zip(residue_numbers, residue_names):
        if resid in resid_to_resname:
            if resid_to_resname[resid] != resname:
                raise RuntimeError("Inconsistient residue names")
        else:
            resid_to_resname[resid] = resname

    # First setup absolute indexing.
    # This maps from (abs_resid, name): atom_index
    # Everything is zero-based
    abs_atom_index = {
        (res_num, atom_name): atom_index
        for res_num, atom_name, atom_index in zip(
            residue_numbers, atom_names, atom_numbers
        )
    }

    # Now setup relative residue indexing. This maps from
    # (chainid, rel_resid): resid.
    rel_residue_index = {}
    for i, chain in enumerate(topology.chains()):
        residues = list(chain.residues())
        if not residues:
            continue
        min_index = min(res.index for res in residues)
        for res in residues:
            rel_residue_index[(i, res.index - min_index)] = res.index

    return Indexer(abs_atom_index, rel_residue_index, residue_names, resid_to_resname)