"""
Look up atom and residue indices.
The :class:`Indexer` class is the main object for performing
indexing look ups.
An :class:`AtomIndex` is a zero-based absolute atom index.
A :class:`ResidueIndex` is a zero-based absolute residue index.
"""
from typing import Dict, List, NamedTuple, Optional, Tuple
class _ChainInfo(NamedTuple):
residues: Dict[int, int]
"maps from index_within_chain to global_index"
class _SubSystemInfo(NamedTuple):
n_residues: int
"number of residues in this subsystem"
chains: List[_ChainInfo]
"a list of chains in the subsystem"
#
# We create AtomIndex and ResidueIndex classes that just
# wrap int. This allows us to check elsewhere in the code
# that we're getting the right type of index.
[docs]class AtomIndex(int):
"""
Zero-based absolute atom index.
Usually, you should get this through `system.atom_index`,
but if you are _sure_ you know what you are doing,
you can construct directly via `AtomIndex(index)`.
"""
pass
[docs]class ResidueIndex(int):
"""
Zero-based absolute residue index.
Usually, you should get this through `system.residue_index`,
but if you are _sure_ you know what you are doing,
you can construct directly via `ResidueIndex(index)`.
"""
pass
#
# Dictionary to cannonicalize residue names
#
cannonicalize = {
"HID": "HIS",
"HIE": "HIS",
"HIP": "HIS",
"ASH": "ASP",
"GLH": "GLU",
"LYN": "LYS",
"CYX": "CYS",
}
[docs]class Indexer:
"""
An object for performing atom and residue index lookups.
"""
[docs] def __init__(
self,
abs_atom_index: Dict[Tuple[int, str], int],
rel_residue_index: Dict[Tuple[int, int], int],
residue_names: List[str],
abs_resid_to_resname: Dict[int, str],
):
"""
Initialize an Indexer object.
Args:
abs_atom_index: maps (resid, atom_name) to atomid
rel_residue_index: maps (chainid, rel_resid) to resid
residue_names: residue name for each atom
abs_resid_to_resname: maps resid to resname
"""
self.abs_atom_index = abs_atom_index
self.rel_residue_index = rel_residue_index
self.residue_names = residue_names
self.abs_resid_to_resname = abs_resid_to_resname
[docs] def residue(
self,
resid: int,
expected_resname: Optional[str] = None,
chainid: Optional[int] = None,
one_based: bool = False,
) -> ResidueIndex:
"""
Find the :class:`ResidueIndex`
The indexing can be either absolute (if `chainid` is `None`),
or relative to a chain (if `chainid` is set).
Both `resid` and `chainid` are one-based if `one_based` is `True`,
or both are zero-based if `one_based=False` (the default).
If `expected_resname` is specified, error checking will be performed to
ensure that the returned atom has the expected residue name. Note
that the residue names are those after processing with `tleap`,
so some residue names may not match their value in an input pdb file.
Args:
resid: the residue index to lookup
expected_resname: the expected residue name, usually three all-caps characters,
e.g. "ALA".
chainid: the chain id to lookup
one_based: use one-based indexing
Returns:
zero-based absolute residue index
"""
if chainid is None:
if one_based:
res_index = resid - 1
else:
res_index = resid
else:
if one_based:
res_index = self.rel_residue_index[(chainid - 1, resid - 1)]
else:
res_index = self.rel_residue_index[(chainid, resid)]
if expected_resname is not None:
actual_resname = self.abs_resid_to_resname[res_index]
if actual_resname in cannonicalize:
actual_resname = cannonicalize[actual_resname]
if actual_resname != expected_resname:
raise KeyError(
f"expected_resname={expected_resname}, but found res_name={actual_resname}."
)
return ResidueIndex(res_index)
[docs] def atom(
self,
resid: int,
atom_name: str,
expected_resname: Optional[str] = None,
chainid: Optional[int] = None,
one_based: bool = False,
) -> AtomIndex:
"""
Find the :class:`AtomIndex`
The indexing can be either absolute (if `chainid` is `None`),
or relative to a chain (if `chainid` is set).
Both `resid` and `chainid` are one-based if `one_based` is `True`,
or both are zero-based if `one_based=False` (the default).
If `expected_resname` is specified, error checking will be performed to
ensure that the returned atom has the expected residue name. Note
that the residue names are those after processing with `tleap`,
so some residue names may not match their value in an input pdb file.
Args:
resid: the residue index to lookup
atom_name: the name of the atom to lookup
expected_resname: the expected residue name, usually three all-caps characters,
e.g. "ALA".
chainid: the chain id to lookup
one_based: use one-based indexing
Returns:
zero-based absolute atom index
"""
resindex = self.residue(resid, expected_resname, chainid, one_based)
return AtomIndex(self.abs_atom_index[resindex, atom_name])
[docs]def setup_indexing(topology):
n_atoms = topology.getNumAtoms()
atom_names = [atom.name for atom in topology.atoms()]
assert len(atom_names) == n_atoms
atom_numbers = [atom.index for atom in topology.atoms()]
assert len(atom_numbers) == n_atoms
residue_names = [atom.residue.name for atom in topology.atoms()]
assert len(residue_names) == n_atoms
residue_numbers = [atom.residue.index for atom in topology.atoms()]
assert len(residue_numbers) == n_atoms
# Setup mapping of resid to resname
resid_to_resname = {}
for resid, resname in zip(residue_numbers, residue_names):
if resid in resid_to_resname:
if resid_to_resname[resid] != resname:
raise RuntimeError("Inconsistient residue names")
else:
resid_to_resname[resid] = resname
# First setup absolute indexing.
# This maps from (abs_resid, name): atom_index
# Everything is zero-based
abs_atom_index = {
(res_num, atom_name): atom_index
for res_num, atom_name, atom_index in zip(
residue_numbers, atom_names, atom_numbers
)
}
# Now setup relative residue indexing. This maps from
# (chainid, rel_resid): resid.
rel_residue_index = {}
for i, chain in enumerate(topology.chains()):
residues = list(chain.residues())
if not residues:
continue
min_index = min(res.index for res in residues)
for res in residues:
rel_residue_index[(i, res.index - min_index)] = res.index
return Indexer(abs_atom_index, rel_residue_index, residue_names, resid_to_resname)