Source code for meld.system.amber

"""
A module to read Amber parmtop and crd files
"""

from typing import Dict, List, Set, Tuple

import numpy as np


[docs]class CrdReader:
    """
    Class to read in coordinates from mdcrd string
    """

    _coords: np.ndarray
    _box_vectors: np.ndarray

[docs]    def __init__(self, crd_string: str):
        """
        Initialize CrdReader

        Args:
            crd_string: mdcrd string from tleap
        """
        self.crd_string = crd_string
        self._read()

[docs]    def get_coordinates(self) -> np.ndarray:
        """
        Get the coordiantes

        Returns:
            coordinates, shape(n_atoms, 3)
        """
        return self._coords

[docs]    def get_box_vectors(self) -> np.ndarray:
        """
        Get the box_vectors

        Returns:
            box vectors, shape(3, 3)
        """
        return self._box_vectors

    def _read(self):
        def split_len(seq, length):
            return [seq[i : i + length] for i in range(0, len(seq), length)]

        lines = self.crd_string.splitlines()
        n_atoms = int(lines[1].split()[0])
        coords = []
        box_vectors = None

        for line in lines[2:]:
            cols = split_len(line, 12)
            cols = [float(c) for c in cols]
            coords.extend(cols)

        # check for box vectors
        if len(coords) == 3 * n_atoms + 6:
            coords, box_vectors = coords[:-6], coords[-6:]
            for bv in box_vectors[-3:]:
                if not bv == 90.0:
                    raise RuntimeError("box angle != 90.0 degrees")
            box_vectors = np.array(box_vectors[:-3])
        elif not len(coords) == 3 * n_atoms:
            raise RuntimeError("len(coords) != 3 * n_atoms")

        coords = np.array(coords)
        coords = coords.reshape((n_atoms, 3)) / 10.0  # angstrom -> nm
        self._coords = coords

        if box_vectors is not None:
            self._box_vectors = box_vectors / 10.0
        else:
            self._box_vectors = None


[docs]class ParmTopReader:
    """
    Read in information from parmtop file
    """

[docs]    def __init__(self, top_string: str):
        """
        Initialize ParmTopReader

        Args:
            top_string: topology string from tleap
        """
        self._top_string = top_string

[docs]    def get_atom_names(self) -> List[str]:
        """
        Get the atom names

        Returns:
            the name for each atom
        """
        return self._get_parameter_block("%FLAG ATOM_NAME", chunksize=4)

[docs]    def get_residue_names(self) -> List[str]:
        """
        Get the residue names

        Returns:
            the residue name for each atom
        """
        res_names = self._get_parameter_block("%FLAG RESIDUE_LABEL", chunksize=4)
        res_numbers = self.get_residue_numbers()
        return [res_names[i - 1] for i in res_numbers]

[docs]    def get_residue_numbers(self) -> List[int]:
        """
        Get the residue numbers

        Returns:
            the residue number for each atom
        """
        n_atoms = int(self._get_parameter_block("%FLAG POINTERS", chunksize=8)[0])
        res_pointers_str = self._get_parameter_block(
            "%FLAG RESIDUE_POINTER", chunksize=8
        )
        res_pointers = [int(p) for p in res_pointers_str]
        res_pointers.append(n_atoms + 1)
        residue_numbers: List[int] = []
        for res_number, (start, end) in enumerate(
            zip(res_pointers[:-1], res_pointers[1:])
        ):
            residue_numbers.extend([res_number + 1] * (int(end) - int(start)))
        return residue_numbers

    def _get_parameter_block(self, flag: str, chunksize: int) -> List[str]:
        lines = self._top_string.splitlines()

        # find the line with our flag
        index_start = [i for (i, line) in enumerate(lines) if line.startswith(flag)][
            0
        ] + 2

        # find the index of the next flag
        index_end = [
            i for (i, line) in enumerate(lines[index_start:]) if line and line[0] == "%"
        ][0] + index_start

        # do something useful with the data
        def chunks(l, n):
            """Yield successive n-sized chunks from l."""
            for i in range(0, len(l), n):
                yield l[i : i + n]

        data = []
        for line in lines[index_start:index_end]:
            for chunk in chunks(line, chunksize):
                data.append(chunk.strip())
        return data

[docs]    def get_bonds(self) -> Set[Tuple[int, int]]:
        """
        Get the bonded atoms from topology

        Returns:
            The set of bonds from the topology

        Note:
           the indices are zero-based
        """
        # the amber bonds section contains a triple of integers for each bond:
        # i, j, type_index. We need i, j, but will end up ignoring type_index
        bond_item_str = self._get_parameter_block(
            "%FLAG BONDS_WITHOUT_HYDROGEN", chunksize=8
        )
        bond_item_str += self._get_parameter_block(
            "%FLAG BONDS_INC_HYDROGEN", chunksize=8
        )
        # the bonds section of the amber file is indexed by coordinate
        # to get the atom index we divide by three and add one
        bond_items = [int(item) // 3 + 1 for item in bond_item_str]

        bonds = set()
        # take the items 3 at a time, ignoring the type_index
        for i, j, _ in zip(bond_items[::3], bond_items[1::3], bond_items[2::3]):
            # Add both orders to make life easy for callers.
            # Amber is 1-based, but we are 0-based.
            bonds.add((i - 1, j - 1))
            bonds.add((j - 1, i - 1))
        return bonds

[docs]    def get_atom_map(self) -> Dict[Tuple[int, str], int]:
        """
        Get the atom map

        Returns:
            the mapping from (resid, atom_name) to atom_index

        Note:
           both resid and atom_index are zero-based
        """
        residue_numbers = [r - 1 for r in self.get_residue_numbers()]
        atom_names = self.get_atom_names()
        atom_numbers = range(len(atom_names))
        return {
            (res_num, atom_name): atom_index
            for res_num, atom_name, atom_index in zip(
                residue_numbers, atom_names, atom_numbers
            )
        }