Source code for perses.rjmc.topology_proposal

"""
This file contains the base classes for topology proposals
"""

import simtk.openmm as openmm
import simtk.openmm.app as app
from collections import namedtuple
import copy
import warnings
import logging
import itertools
import json
import os
import openeye.oechem as oechem
import numpy as np
import openeye.oeomega as oeomega
import tempfile
import networkx as nx
from openmoltools import forcefield_generators
import openeye.oegraphsim as oegraphsim
from perses.rjmc.geometry import FFAllAngleGeometryEngine
from perses.storage import NetCDFStorageView
try:
    from StringIO import StringIO
except ImportError:
    from io import StringIO
import openmoltools
import base64
import logging
import time
import progressbar
from typing import List, Dict
try:
    from subprocess import getoutput  # If python 3
except ImportError:
    from commands import getoutput  # If python 2

################################################################################
# CONSTANTS
################################################################################

OESMILES_OPTIONS = oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_ISOMERIC | oechem.OESMILESFlag_Hydrogens

#DEFAULT_ATOM_EXPRESSION = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_Hybridization #| oechem.OEExprOpts_EqAromatic | oechem.OEExprOpts_EqHalogen | oechem.OEExprOpts_RingMember | oechem.OEExprOpts_EqCAliphaticONS
#DEFAULT_BOND_EXPRESSION = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_RingMember

DEFAULT_ATOM_EXPRESSION = oechem.OEExprOpts_DefaultAtoms
DEFAULT_BOND_EXPRESSION = oechem.OEExprOpts_DefaultBonds
################################################################################
# LOGGER
################################################################################

_logger = logging.getLogger("proposal_engine")

################################################################################
# UTILITIES
################################################################################

def append_topology(destination_topology, source_topology, exclude_residue_name=None):
    """
    Add the source OpenMM Topology to the destination Topology.

    Parameters
    ----------
    destination_topology : simtk.openmm.app.Topology
        The Topology to which the contents of `source_topology` are to be added.
    source_topology : simtk.openmm.app.Topology
        The Topology to be added.
    exclude_residue_name : str, optional, default=None
        If specified, any residues matching this name are excluded.

    """
    if exclude_residue_name is None:
        exclude_residue_name = "   " #something with 3 characters that is never a residue name
    newAtoms = {}
    for chain in source_topology.chains():
        newChain = destination_topology.addChain(chain.id)
        for residue in chain.residues():
            if (residue.name[:3] == exclude_residue_name[:3]):
                continue
            newResidue = destination_topology.addResidue(residue.name, newChain, residue.id)
            for atom in residue.atoms():
                newAtom = destination_topology.addAtom(atom.name, atom.element, newResidue, atom.id)
                newAtoms[atom] = newAtom
    for bond in source_topology.bonds():
        if (bond[0].residue.name[:3]==exclude_residue_name[:3]) or (bond[1].residue.name[:3]==exclude_residue_name[:3]):
            continue
        # TODO: Preserve bond order info using extended OpenMM API
        destination_topology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])

def deepcopy_topology(source_topology):
    """
    Drop-in replacement for copy.deepcopy(topology) that fixes backpointer issues.

    Parameters
    ----------
    source_topology : simtk.openmm.app.Topology
        The Topology to be added.

    """
    topology = app.Topology()
    append_topology(topology, source_topology)
    return topology

def has_h_mapped(atommap, mola: oechem.OEMol, molb: oechem.OEMol):
    for a_atom, b_atom in atommap.items():
        if mola.GetAtom(oechem.OEHasAtomIdx(a_atom)).GetAtomicNum() == 1 or molb.GetAtom(oechem.OEHasAtomIdx(b_atom)).GetAtomicNum() == 1:
            return True

    return False

[docs]class SmallMoleculeAtomMapper(object):
    """
    This is a utility class for generating and retrieving sets of atom maps between molecules using OpenEye.
    It additionally verifies that all atom maps lead to valid proposals, as well as checking that the graph of
    proposals is not disconnected.
    """

[docs]    def __init__(self, list_of_smiles: List[str], atom_match_expression: int=None, bond_match_expression: int=None, prohibit_hydrogen_mapping: bool=True):

        self._unique_noncanonical_smiles_list = list(set(list_of_smiles))
        self._oemol_dictionary = self._initialize_oemols(self._unique_noncanonical_smiles_list)
        self._unique_smiles_list = list(self._oemol_dictionary.keys())

        self._n_molecules = len(self._unique_smiles_list)

        self._prohibit_hydrogen_mapping = prohibit_hydrogen_mapping

        if atom_match_expression is None:
            self._atom_expr = DEFAULT_ATOM_EXPRESSION
        else:
            self._atom_expr = atom_match_expression
        
        if bond_match_expression is None:
            self._bond_expr = DEFAULT_BOND_EXPRESSION
        else:
            self._bond_expr = bond_match_expression
        
        self._molecules_mapped = False
        self._molecule_maps = {}
        self._failed_molecule_maps = {}

        self._proposal_matrix = np.zeros([self._n_molecules, self._n_molecules])
        self._proposal_matrix_generated = False
        self._constraints_checked = False

[docs]    def map_all_molecules(self):
        """
        Run the atom mapping routines to get all atom maps. This automatically preserves only maps that contain enough torsions to propose.
        It does not ensure that constraints do not change--use verify_constraints to check that property. This method is idempotent--running it a second
        time will have no effect.
        """
        
        if self._molecules_mapped:
            _logger.info("The molecules have already been mapped. Returning.")
            return

        with progressbar.ProgressBar(max_value=self._n_molecules*(self._n_molecules-1)/2.0) as bar:

            current_index = 0

            for molecule_smiles_pair in itertools.combinations(self._oemol_dictionary.keys(), 2):
                molecule_pair = tuple(self._oemol_dictionary[molecule] for molecule in molecule_smiles_pair)

                self._molecule_maps[molecule_smiles_pair] = []
                self._failed_molecule_maps[molecule_smiles_pair] = []

                atom_matches, failed_atom_matches = self._map_atoms(molecule_pair[0], molecule_pair[1])

                for atom_match in atom_matches:
                    self._molecule_maps[molecule_smiles_pair].append(atom_match)

                for failed_atom_match in failed_atom_matches:
                    self._failed_molecule_maps[molecule_smiles_pair].append(failed_atom_match)

                current_index += 1
                bar.update(current_index)
            
        self._molecules_mapped = True
    
[docs]    def get_atom_maps(self, smiles_A: str, smiles_B: str) -> List[Dict]:
        """
        Given two canonical smiles strings, get the atom maps.

        Arguments
        ---------
        smiles_A : str
            Canonical smiles for the first molecule (keys)
        smiles_B : str
            Canonical smiles for the second molecule (values)

        Returns
        -------
        atom_maps : list of dict
            List of map of molecule_A_atom : molecule_B_atom
        """
        try:
            atom_maps = self._molecule_maps[(smiles_A, smiles_B)]
            return atom_maps
        except KeyError:
            try:
                atom_maps = self._molecule_maps[(smiles_B, smiles_A)]
                output_atom_maps = []
                for atom_map in atom_maps:
                    reversed_map = {value: key for key, value in atom_map.items()}
                    output_atom_maps.append(reversed_map)
                return output_atom_maps
            except KeyError as e:
                print("The requested pair was not found. Ensure you are using canonicalized smiles.")
                raise e

[docs]    def generate_and_check_proposal_matrix(self):
        """
        Generate a proposal matrix and check it for connectivity. Note that if constraints have not been checked, this may produce
        a proposal matrix that makes proposals changing constraint lengths.
        """

        if not self._constraints_checked:
            _logger.warn("Constraints have not been checked. Building proposal matrix, but it might result in error.")
            _logger.warn("Call constraint_check() with an appropriate system generator to ensure this does not happen.")

        proposal_matrix = self._create_proposal_matrix()
        adjacency_matrix = proposal_matrix > 0.0
        graph = nx.from_numpy_array(adjacency_matrix)
        
        if not nx.is_connected(graph):
            _logger.warn("The graph of proposals is not connected! Some molecules will be unreachable.")
        
        self._proposal_matrix = proposal_matrix

    def _create_proposal_matrix(self) -> np.array:
        """
        In RJ calculations, we propose based on how many atoms are in common between molecules. This routine checks that the graph of proposals cannot
        be separated. In calculating the proposal matrix, we use the min(n_atom_mapped) when there are multiple maps.

        Returns
        -------
        normalized_proposal_matrix : np.array of float
            The proposal matrix
        """
        proposal_matrix = np.zeros([self._n_molecules, self._n_molecules])
        
        for smiles_pair, atom_maps in self._molecule_maps.items():

            #retrieve the smiles strings of these molecules
            molecule_A = smiles_pair[0]
            molecule_B = smiles_pair[1]
            
            #retrieve the indices of these molecules from the list of smiles
            molecule_A_idx = self._unique_smiles_list.index(molecule_A)
            molecule_B_idx = self._unique_smiles_list.index(molecule_B)
            
            #if there are no maps, we can't propose
            if len(atom_maps) == 0:
                proposal_matrix[molecule_A_idx, molecule_B_idx] = 0.0
                proposal_matrix[molecule_B_idx, molecule_A_idx] = 0.0
                continue

            #get a list of the number of atoms mapped for each map
            number_of_atoms_in_maps = [len(atom_map.keys()) for atom_map in atom_maps]

            unnormalized_proposal_probability = float(min(number_of_atoms_in_maps))


            proposal_matrix[molecule_A_idx, molecule_B_idx] = unnormalized_proposal_probability
            proposal_matrix[molecule_B_idx, molecule_A_idx] = unnormalized_proposal_probability

        #normalize the proposal_matrix:

        #First compute the normalizing constants by summing the rows
        normalizing_constants = np.sum(proposal_matrix, axis=1)

        #If any normalizing constants are zero, that means that the molecule is completely unproposable:
        if np.any(normalizing_constants==0.0):
            where_zero = np.where(normalizing_constants==0.0)[0]
            failed_molecules = []
            for zero_idx in where_zero:
                failed_molecules.append(self._unique_smiles_list[zero_idx])

            print("The following molecules are unproposable:\n")
            print(failed_molecules)
            raise ValueError("Some molecules could not be proposed. Make sure the atom mapping criteria do not completely exclude a molecule.")
        
        normalized_proposal_matrix = proposal_matrix / normalizing_constants[:, np.newaxis]

        return normalized_proposal_matrix
    
    @staticmethod
    def _canonicalize_smiles(mol: oechem.OEMol) -> str:
        """
        Convert an oemol into canonical isomeric smiles

        Parameters
        ----------
        mol : oechem.OEmol
            OEMol for molecule
        Returns
        -------
        iso_can_smiles : str
            OpenEye isomeric canonical smiles corresponding to the input
        """
        iso_can_smiles = oechem.OECreateSmiString(mol, OESMILES_OPTIONS)
        return iso_can_smiles
    
    def _map_atoms(self, moleculeA: oechem.OEMol, moleculeB: oechem.OEMol, exhaustive: bool=True) -> List[Dict]:
        """
        Run the mapping on the two input molecules. This will return a list of atom maps. 
        This is an internal method that is only intended to be used by other methods of this class.

        Arguments
        ---------
        moleculeA : oechem.OEMol
            The first oemol of the pair
        moleculeB : oechem.OEMol
            The second oemol of the pair
        exhaustive: bool, default True
            Whether to use an exhaustive procedure for enumerating MCS matches. Default True, but for large molecules,
            may be prohibitively slow. 

        Returns
        -------
        atom_matches: list of dict
            This returns a list of dictionaries, where each dictionary is a map of the form {molA_atom: molB_atom}.
            Atom maps with less than 3 mapped atoms, or where the map is not sufficient to begin a geometry proposal
            will be returned separately.
        failed_atom_matches : list of dict
            This is a list of atom maps that cannot be used for geometry proposals. It is returned for debugging purposes.
        """    

        oegraphmol_current = oechem.OEGraphMol(moleculeA) # pattern molecule
        oegraphmol_proposed = oechem.OEGraphMol(moleculeB) # target molecule
        
        if exhaustive:
            mcs = oechem.OEMCSSearch(oechem.OEMCSType_Exhaustive)
        else:
            mcs = oechem.OEMCSSearch(oechem.OEMCSType_Approximate)
           
        mcs.Init(oegraphmol_current, self._atom_expr, self._bond_expr)

        mcs.SetMCSFunc(oechem.OEMCSMaxBondsCompleteCycles())

        #only use unique matches
        unique = True

        matches = [m for m in mcs.Match(oegraphmol_proposed, unique)]

        #if there are no matches at all, we return two empty lists.
        if not matches:
            return [], []

        atom_matches = []
        failed_atom_matches = []

        for match in matches:
            #if there are less than 3 mapped atoms, it can't be used for geometry proposals.
            #Continue without recording it.
            if match.NumAtoms() < 3:
                continue

            #extract the match as a dictionary.
            a_to_b_atom_map = {}
            for matchpair in match.GetAtoms():
                a_index = matchpair.pattern.GetIdx()
                b_index = matchpair.target.GetIdx()

                #if we aren't allowing hydrogen maps, we need to ensure that neither mapped atom is a hydrogen
                if self._prohibit_hydrogen_mapping:
                    if matchpair.pattern.GetAtomicNum() == 1 or matchpair.target.GetAtomicNum() == 1:
                        continue
                    else:
                        a_to_b_atom_map[a_index] = b_index
                else:
                    a_to_b_atom_map[a_index] = b_index
            #Even if there are at least three atoms mapped, it is possible that the geometry proposal still cannot proceed
            #An example of this would be mapping H-C-H -- There will be no topological torsions to use for the proposal.
            if self._valid_match(moleculeA, moleculeB, a_to_b_atom_map):
                atom_matches.append(a_to_b_atom_map)
            else:
                failed_atom_matches.append(a_to_b_atom_map)
        
        return atom_matches, failed_atom_matches

    def _valid_match(self, moleculeA: oechem.OEMol, moleculeB: oechem.OEMol, a_to_b_mapping: Dict[int, int]) -> bool:
        """
        Check that the map can allow for a geometry proposal. Essentially, this amounts to ensuring that there exists
        a starting topological torsion. Examples of cases where this would not exist would include:
        H-C-H, Cl-C-Cl, CH3, etc.

        Arguments
        ---------
        moleculeA : oechem.OEMol
            The first molecule in the mapping
        moleculeB: oechem.OEMol
            The second molecule used in the mapping
        b_to_a_mapping : dict
            The mapping from molecule B to molecule A

        Returns
        -------
        is_valid : bool
            Whether the mapping can be used to generate a geometry proposal
        """
        graphA = self._mol_to_graph(moleculeA)
        graphB = self._mol_to_graph(moleculeB)

        #if both of these are good to make a map, we can make the map
        return self._can_make_proposal(graphA, a_to_b_mapping.keys()) and self._can_make_proposal(graphB, a_to_b_mapping.values())

    def _can_make_proposal(self, graph: nx.Graph, mapped_atoms: List) -> bool:
        """
        Check whether a given setup (molecule graph along with mapped atoms) can be proposed.
        
        Arguments
        ---------
        graph: nx.Graph
            The molecule represented as a NetworkX graph
        mapped_atoms : list
            The list of atoms that have been mapped
        
        Returns
        -------
        can_make_proposal : bool
            Whether this map permits the GeometryEngine to make a proposal
        """

        proposable = False
        total_atoms = set(range(graph.number_of_nodes()))
        unmapped_atoms = total_atoms - set(mapped_atoms)
        mapped_atoms_set = set(mapped_atoms)
         
        #find the set of atoms that are unmapped, but on the boundary with those that are mapped
        boundary_atoms = nx.algorithms.node_boundary(graph, unmapped_atoms, mapped_atoms)

        #now check if there is atom 3 hops away and has a position. Since we are starting with boundary
        #atoms, there will never be a case where there is an atom with positions 3 hops away but no torsion
        #A ring might cause this artifact, but there would be a torsion in that case.
        for atom in boundary_atoms:
            shortest_paths = nx.algorithms.shortest_path_length(graph, source=atom)
            for other_atom, distance in shortest_paths.items():
                if distance == 3 and other_atom in mapped_atoms:
                    #find all shortest paths to the other atom. if any of them have all atoms with positions, it can be proposed
                    shortest_path = nx.shortest_path(graph, source=atom, target=other_atom)
                    if len(mapped_atoms_set.intersection(shortest_path)) == 3:
                        proposable = True
        
        return proposable

    def _mol_to_graph(self, molecule: oechem.OEMol) -> nx.Graph:
        """
        Convert an OEMol to a networkx graph for analysis

        Arguments
        ---------
        molecule : oechem.OEMol
            Molecule to convert to a graph
        
        Returns
        -------
        g : nx.Graph
            NetworkX graph representing the molecule
        """
        g = nx.Graph()
        for atom in molecule.GetAtoms():
            g.add_node(atom.GetIdx())
        for bond in molecule.GetBonds():
            g.add_edge(bond.GetBgnIdx(), bond.GetEndIdx(), bond=bond)
        
        return g

    def _initialize_oemols(self, list_of_smiles: List[str]) -> Dict[str, oechem.OEMol]:
        """
        Initialize the set of OEMols that we will use to construct the atom map

        Arguments
        ---------
        list_of_smiles : list of str
            list of smiles strings to use
        
        Returns
        -------
        dict_of_oemol : dict of oechem.OEmol
            dict of canonical_smiles : oechem.OEMol
        """
        list_of_mols = []
        for smiles in list_of_smiles:
            mol = oechem.OEMol()
            oechem.OESmilesToMol(mol, smiles)
            oechem.OEAddExplicitHydrogens(mol)
            list_of_mols.append(mol)

        #now write out all molecules and read them back in:
        molecule_string = SmallMoleculeAtomMapper.molecule_library_to_string(list_of_mols)
        dict_of_oemol = SmallMoleculeAtomMapper.molecule_library_from_string(molecule_string)

        return dict_of_oemol

[docs]    def get_oemol_from_smiles(self, smiles_string: str) -> oechem.OEMol:
        """
        Get the OEMol corresponding to the smiles string requested. This method exists
        to avoid having atom order rearranged by regeneration from smiles.

        Arguments
        ---------
        smiles_string : str
            The smiles string for which to retrieve the OEMol. Only pre-existing OEMols are allowed
        
        Returns
        -------
        mol : oechem.OEMol
            The OEMol corresponding to the requested smiles string.
        """
        return self._oemol_dictionary[smiles_string]

[docs]    def get_smiles_index(self, smiles: str) -> int:
        """
        Get the index of the smiles in question

        Arguments
        ---------
        smiles : str
            Canonicalized smiles string to retrieve molecule
        
        Returns
        -------
        mol_index : int
            Index of molecule in list
        """
        mol_index = self._unique_smiles_list.index(smiles)
        return mol_index

[docs]    @staticmethod
    def molecule_library_from_string(molecule_string: str) -> Dict[str, oechem.OEMol]:
        """
        Given a library of molecules in a mol2-format string, return a dictionary that maps
        the respective canonical smiles to the oemol object.

        Parameters
        ----------
        molecule_string : str
            The string containing the molecule data

        Returns
        -------
        molecule_dictionary : dict of str: oemol
            Dictionary mapping smiles strings to OEMols

        """

        ifs = oechem.oemolistream()
        ifs.SetFormat(oechem.OEFormat_OEB)
        ifs.openstring(molecule_string)

        molecule_dictionary = {}
        for mol in ifs.GetOEMols():
            copied_mol = oechem.OEMol(mol)
            canonical_smiles = SmallMoleculeAtomMapper._canonicalize_smiles(copied_mol)
            molecule_dictionary[canonical_smiles] = copied_mol

        ifs.close()

        return molecule_dictionary

[docs]    @staticmethod
    def molecule_library_to_string(molecule_library: List[oechem.OEMol]) -> str:
        """
        Given a list of oechem.OEMol objects, write all of them in mol2 format to a string

        Parameters
        ----------
        molecule_library : list of oechem.OEMol
            molecules to write to string

        Returns
        -------
        molecule_string : str
            String containing molecules in mol2 format.
        """

        ofs = oechem.oemolostream()
        ofs.SetFormat(oechem.OEFormat_OEB)
        ofs.openstring()

        for mol in molecule_library:
            oechem.OEWriteMolecule(ofs, mol)

        molecule_string = ofs.GetString()

        ofs.close()

        return molecule_string

[docs]    def to_json(self) -> str:
        """
        Write out this class to JSON. This saves all information (including built molecules and maps, if present)

        Returns
        -------
        json_str : str
            JSON string representing this class
        """
        json_dict = {}
        
        #first, save all the things that are not too difficult to put into JSON
        json_dict['molecule_maps'] = {"_".join(smiles_names): maps for smiles_names, maps in self._molecule_maps.items()}
        json_dict['molecules_mapped'] = self._molecules_mapped
        json_dict['failed_molecule_maps'] = {"_".join(smiles_names): maps for smiles_names, maps in self._failed_molecule_maps.items()}
        json_dict['constraints_checked'] = self._constraints_checked
        json_dict['bond_expr'] = self._bond_expr
        json_dict['atom_expr'] = self._atom_expr
        json_dict['proposal_matrix'] = self._proposal_matrix.tolist()
        json_dict['proposal_matrix_generated'] = self._proposal_matrix_generated
        json_dict['unique_smiles_list'] = self._unique_smiles_list

        #now we have to convert the OEMols to a string format that preserves the atom ordering in order to save them
        #We will use the multi-molecule mol2 scheme, but instead of saving to a file, we will save to a string.

        molecule_bytes = SmallMoleculeAtomMapper.molecule_library_to_string(self._oemol_dictionary.values())

        base64_molecule_bytes = base64.b64encode(molecule_bytes)

        json_dict['molecules'] = base64_molecule_bytes.decode()

        return json.dumps(json_dict)

[docs]    @classmethod
    def from_json(cls, json_string: str):
        """
        Restore this class from a saved JSON file.

        Arguments
        ---------
        json_string : str
            The JSON string representing the serialized class
        
        Returns
        -------
        atom_mapper : SmallMoleculeAtomMapper
            An instance of the SmallMoleculeAtomMapper
        """
        json_dict = json.loads(json_string)

        #first let's read in all the molecules that were saved as a string:
        molecule_bytes = json_dict['molecules'].encode()
        molecule_string = base64.b64decode(molecule_bytes)
        molecule_dictionary = SmallMoleculeAtomMapper.molecule_library_from_string(molecule_string)

        bond_expr = json_dict['bond_expr']
        atom_expr = json_dict['atom_expr']
        smiles_list = json_dict['unique_smiles_list']

        map_to_ints = lambda maps: [{int(key): int(value) for key, value in map.items()} for map in maps]

        mapper = cls(smiles_list, atom_match_expression=atom_expr, bond_match_expression=bond_expr)

        mapper._molecule_maps = {tuple(smiles for smiles in key.split("_")) : map_to_ints(maps) for key, maps in json_dict['molecule_maps'].items()}
        mapper._molecules_mapped = json_dict['molecules_mapped']
        mapper._failed_molecule_maps = {tuple(smiles for smiles in key.split("_")) : maps for key, maps in json_dict['failed_molecule_maps'].items()}
        mapper._constraints_checked = json_dict['constraints_checked']
        mapper._proposal_matrix = np.array(json_dict['proposal_matrix'])
        mapper._proposal_matrix_generated = json_dict['proposal_matrix_generated']
        mapper._oemol_dictionary = molecule_dictionary

        return mapper

    @property
    def proposal_matrix(self):
        return self._proposal_matrix
    
    @property
    def n_molecules(self):
        return self._n_molecules
    
    @property
    def smiles_list(self):
        return self._unique_smiles_list



from perses.rjmc.geometry import NoTorsionError
[docs]class TopologyProposal(object):
    """
    This is a container class with convenience methods to access various objects needed
    for a topology proposal

    Arguments
    ---------
    new_topology : simtk.openmm.Topology object
        openmm Topology representing the proposed new system
    new_system : simtk.openmm.System object
        openmm System of the newly proposed state
    old_topology : simtk.openmm.Topology object
        openmm Topology of the current system
    old_system : simtk.openmm.System object
        openm System of the current state
    logp_proposal : float
        contribution from the chemical proposal to the log probability of acceptance (Eq. 36 for hybrid; Eq. 53 for two-stage)
    new_to_old_atom_map : dict
        {new_atom_idx : old_atom_idx} map for the two systems
    old_alchemical_atoms : list, optional, default=None
        List of all atoms in old system that are being transformed.
        If None, all atoms are assumed to be part of the alchemical region.
    chemical_state_key : str
        The current chemical state (unique)
    metadata : dict
        additional information of interest about the state

    Properties
    ----------
    new_topology : simtk.openmm.Topology object
        openmm Topology representing the proposed new system
    new_system : simtk.openmm.System object
        openmm System of the newly proposed state
    old_topology : simtk.openmm.Topology object
        openmm Topology of the current system
    old_system : simtk.openmm.System object
        openm System of the current state
    old_positions : [n, 3] np.array, Quantity
        positions of the old system
    logp_proposal : float
        contribution from the chemical proposal to the log probability of acceptance (Eq. 36 for hybrid; Eq. 53 for two-stage)
    new_to_old_atom_map : dict
        {new_atom_idx : old_atom_idx} map for the two systems
    old_to_new_atom_map : dict
        {old_atom_idx : new_atom_idx} map for the two systems
    new_alchemical_atoms : list
        List of all atoms in new system that are being transformed
    new_environment_atoms : list
        List of all atoms in new system that are not transformed, just mapped
    old_alchemical_atoms : list
        List of all atoms in old system that are being transformed
    old_environment_atoms : list
        List of all atoms in old system that are not transformed, just mapped
    unique_new_atoms : list of int
        List of indices of the unique new atoms
    unique_old_atoms : list of int
        List of indices of the unique old atoms
    natoms_new : int
        Number of atoms in the new system
    natoms_old : int
        Number of atoms in the old system
    old_chemical_state_key : str
        The previous chemical state key
    new_chemical_state_key : str
        The proposed chemical state key
    metadata : dict
        additional information of interest about the state
    """

[docs]    def __init__(self,
                 new_topology=None, new_system=None,
                 old_topology=None, old_system=None,
                 logp_proposal=None,
                 new_to_old_atom_map=None, old_alchemical_atoms=None,
                 old_chemical_state_key=None, new_chemical_state_key=None,
                 metadata=None):

        if new_chemical_state_key is None or old_chemical_state_key is None:
            raise ValueError("chemical_state_keys must be set.")
        self._new_topology = new_topology
        self._new_system = new_system
        self._old_topology = old_topology
        self._old_system = old_system
        self._logp_proposal = logp_proposal
        self._new_chemical_state_key = new_chemical_state_key
        self._old_chemical_state_key = old_chemical_state_key
        self._new_to_old_atom_map = new_to_old_atom_map
        self._old_to_new_atom_map = {old_atom : new_atom for new_atom, old_atom in new_to_old_atom_map.items()}
        self._unique_new_atoms = list(set(range(self._new_topology._numAtoms))-set(self._new_to_old_atom_map.keys()))
        self._unique_old_atoms = list(set(range(self._old_topology._numAtoms))-set(self._new_to_old_atom_map.values()))
        self._old_alchemical_atoms = set(old_alchemical_atoms) if (old_alchemical_atoms is not None) else {atom for atom in range(old_system.getNumParticles())}
        self._new_alchemical_atoms = set(self._old_to_new_atom_map.values()).union(self._unique_new_atoms)
        self._old_environment_atoms = set(range(old_system.getNumParticles())) - self._old_alchemical_atoms
        self._new_environment_atoms = set(range(new_system.getNumParticles())) - self._new_alchemical_atoms
        self._metadata = metadata

    @property
    def new_topology(self):
        return self._new_topology
    @property
    def new_system(self):
        return self._new_system
    @property
    def old_topology(self):
        return self._old_topology
    @property
    def old_system(self):
        return self._old_system
    @property
    def logp_proposal(self):
        return self._logp_proposal
    @property
    def new_to_old_atom_map(self):
        return self._new_to_old_atom_map
    @property
    def old_to_new_atom_map(self):
        return self._old_to_new_atom_map
    @property
    def unique_new_atoms(self):
        return self._unique_new_atoms
    @property
    def unique_old_atoms(self):
        return self._unique_old_atoms
    @property
    def new_alchemical_atoms(self):
        return list(self._new_alchemical_atoms)
    @property
    def old_alchemical_atoms(self):
        return list(self._old_alchemical_atoms)
    @property
    def new_environment_atoms(self):
        return list(self._new_environment_atoms)
    @property
    def old_environment_atoms(self):
        return list(self._old_environment_atoms)
    @property
    def n_atoms_new(self):
        return self._new_system.getNumParticles()
    @property
    def n_atoms_old(self):
        return self._old_system.getNumParticles()
    @property
    def new_chemical_state_key(self):
        return self._new_chemical_state_key
    @property
    def old_chemical_state_key(self):
        return self._old_chemical_state_key
    @property
    def metadata(self):
        return self._metadata

[docs]class ProposalEngine(object):
    """
    This defines a type which, given the requisite metadata, can produce Proposals (namedtuple)
    of new topologies.

    Arguments
    --------
    system_generator : SystemGenerator
        The SystemGenerator to use to generate new System objects for proposed Topology objects
    proposal_metadata : dict
        Contains information necessary to initialize proposal engine
    verbose : bool, optional, default=False
        If True, print verbose debugging output

    Properties
    ----------
    chemical_state_list : list of str
         a list of all the chemical states that this proposal engine may visit.
    """

[docs]    def __init__(self, system_generator, proposal_metadata=None, always_change=True, verbose=False):
        self._system_generator = system_generator
        self.verbose = verbose
        self._always_change = always_change

[docs]    def propose(self, current_system, current_topology, current_metadata=None):
        """
        Base interface for proposal method.

        Arguments
        ---------
        current_system : simtk.openmm.System object
            The current system object
        current_topology : simtk.openmm.app.Topology object
            The current topology
        current_metadata : dict
            Additional metadata about the state
        Returns
        -------
        proposal : TopologyProposal
            NamedTuple of type TopologyProposal containing forward and reverse
            probabilities, as well as old and new topologies and atom
            mapping
        """
        return TopologyProposal(new_topology=app.Topology(), old_topology=app.Topology(), old_system=current_system, old_chemical_state_key="C", new_chemical_state_key="C", logp_proposal=0.0, new_to_old_atom_map={0 : 0}, metadata={'molecule_smiles' : 'CC'})

[docs]    def compute_state_key(self, topology):
        """
        Compute the corresponding state key of a given topology,
        according to this proposal engine's scheme.

        Parameters
        ----------
        topology : app.Topology
            the topology in question

        Returns
        -------
        chemical_state_key : str
            The chemical_state_key
        """
        pass

    @property
    def chemical_state_list(self):
        raise NotImplementedError("This ProposalEngine does not expose a list of possible chemical states.")

[docs]class PolymerProposalEngine(ProposalEngine):
[docs]    def __init__(self, system_generator, chain_id, proposal_metadata=None, verbose=False, always_change=True):
        super(PolymerProposalEngine,self).__init__(system_generator, proposal_metadata=proposal_metadata, verbose=verbose, always_change=always_change)
        self._chain_id = chain_id

[docs]    def propose(self, current_system, current_topology, current_metadata=None):
        """

        Arguments
        ---------
        current_system : simtk.openmm.System object
            The current system object
        current_topology : simtk.openmm.app.Topology object
            The current topology
        current_metadata : dict -- OPTIONAL
        Returns
        -------
        proposal : TopologyProposal
            NamedTuple of type TopologyProposal containing forward and reverse
            probabilities, as well as old and new topologies and atom
            mapping
        """

        # old_topology : simtk.openmm.app.Topology
        old_topology = app.Topology()
        append_topology(old_topology, current_topology)

        # new_topology : simtk.openmm.app.Topology
        new_topology = app.Topology()
        append_topology(new_topology, current_topology)

        # Check that old_topology and old_system have same number of atoms.
        old_system = current_system
        old_topology_natoms = sum([1 for atom in old_topology.atoms()]) # number of topology atoms
        old_system_natoms = old_system.getNumParticles()
        if old_topology_natoms != old_system_natoms:
            msg = 'PolymerProposalEngine: old_topology has %d atoms, while old_system has %d atoms' % (old_topology_natoms, old_system_natoms)
            raise Exception(msg)

        # metadata : dict, key = 'chain_id' , value : str
        metadata = current_metadata
        if metadata == None:
            metadata = dict()
        # old_chemical_state_key : str
        old_chemical_state_key = self.compute_state_key(old_topology)

        # chain_id : str
        chain_id = self._chain_id
        # save old indices for mapping -- could just directly save positions instead

        # EDITING new_topology
        # atom : simtk.openmm.app.topology.Atom
        for atom in new_topology.atoms():
            # atom.old_index : int
            atom.old_index = atom.index

        index_to_new_residues, metadata = self._choose_mutant(new_topology, metadata)
        # residue_map : list(tuples : simtk.openmm.app.topology.Residue (existing residue), str (three letter residue name of proposed residue))
        residue_map = self._generate_residue_map(new_topology, index_to_new_residues)
        for (res, new_name) in residue_map:
            if res.name == new_name:
                del(index_to_new_residues[res.index])
        if len(index_to_new_residues) == 0:
            atom_map = dict()
            for atom in new_topology.atoms():
                atom_map[atom.index] = atom.index
            if self.verbose: print('PolymerProposalEngine: No changes to topology proposed, returning old system and topology')
            topology_proposal = TopologyProposal(new_topology=old_topology, new_system=old_system, old_topology=old_topology, old_system=old_system, old_chemical_state_key=old_chemical_state_key, new_chemical_state_key=old_chemical_state_key, logp_proposal=0.0, new_to_old_atom_map=atom_map)
            return topology_proposal


        # new_topology : simtk.openmm.app.Topology extra atoms from old residue have been deleted, missing atoms in new residue not yet added
        # missing_atoms : dict, key : simtk.openmm.app.topology.Residue, value : list(simtk.openmm.app.topology._TemplateAtomData)
        new_topology, missing_atoms = self._delete_excess_atoms(new_topology, residue_map)
        # new_topology : simtk.openmm.app.Topology new residue has all correct atoms for desired mutation
        new_topology = self._add_new_atoms(new_topology, missing_atoms, residue_map)

        atom_map = self._construct_atom_map(residue_map, old_topology, index_to_new_residues, new_topology)

        # new_chemical_state_key : str
        new_chemical_state_key = self.compute_state_key(new_topology)
        # new_system : simtk.openmm.System
        new_system = self._system_generator.build_system(new_topology)

        # Create TopologyProposal.
        topology_proposal = TopologyProposal(new_topology=new_topology, new_system=new_system, old_topology=old_topology, old_system=old_system, old_chemical_state_key=old_chemical_state_key, new_chemical_state_key=new_chemical_state_key, logp_proposal=0.0, new_to_old_atom_map=atom_map)

        # Check that old_topology and old_system have same number of atoms.
#        old_system = current_system
        old_topology_natoms = sum([1 for atom in old_topology.atoms()]) # number of topology atoms
        old_system_natoms = old_system.getNumParticles()
        if old_topology_natoms != old_system_natoms:
            msg = 'PolymerProposalEngine: old_topology has %d atoms, while old_system has %d atoms' % (old_topology_natoms, old_system_natoms)
            raise Exception(msg)

        # Check that new_topology and new_system have same number of atoms.
        new_topology_natoms = sum([1 for atom in new_topology.atoms()]) # number of topology atoms
        new_system_natoms = new_system.getNumParticles()
        if new_topology_natoms != new_system_natoms:
            msg = 'PolymerProposalEngine: new_topology has %d atoms, while new_system has %d atoms' % (new_topology_natoms, new_system_natoms)
            raise Exception(msg)
                    # Check to make sure no out-of-bounds atoms are present in new_to_old_atom_map
        natoms_old = topology_proposal.old_system.getNumParticles()
        natoms_new = topology_proposal.new_system.getNumParticles()
        if not set(topology_proposal.new_to_old_atom_map.values()).issubset(range(natoms_old)):
            msg = "Some old atoms in TopologyProposal.new_to_old_atom_map are not in span of old atoms (1..%d):\n" % natoms_old
            msg += str(topology_proposal.new_to_old_atom_map)
            raise Exception(msg)
        if not set(topology_proposal.new_to_old_atom_map.keys()).issubset(range(natoms_new)):
            msg = "Some new atoms in TopologyProposal.new_to_old_atom_map are not in span of old atoms (1..%d):\n" % natoms_new
            msg += str(topology_proposal.new_to_old_atom_map)
            raise Exception(msg)

        return topology_proposal

    def _choose_mutant(self, topology, metadata):
        index_to_new_residues = dict()
        return index_to_new_residues, metadata

    def _generate_residue_map(self, topology, index_to_new_residues):
        """
        generates list to reference residue instance to be edited, because topology.residues() cannot be referenced directly by index

        Arguments
        ---------
        topology : simtk.openmm.app.Topology
        index_to_new_residues : dict
            key : int (index, zero-indexed in chain)
            value : str (three letter residue name)
        Returns
        -------
        residue_map : list(tuples)
            simtk.openmm.app.topology.Residue (existing residue), str (three letter residue name of proposed residue)
        """
        # residue_map : list(tuples : simtk.openmm.app.topology.Residue (existing residue), str (three letter residue name of proposed residue))
        # r : simtk.openmm.app.topology.Residue, r.index : int, 0-indexed
        residue_map = [(r, index_to_new_residues[r.index]) for r in topology.residues() if r.index in index_to_new_residues]
        return residue_map

    def _delete_excess_atoms(self, topology, residue_map):
        """
        delete excess atoms from old residues and identify new atoms for new residues

        Arguments
        ---------
        topology : simtk.openmm.app.Topology
        residue_map : list(tuples)
            simtk.openmm.app.topology.Residue (existing residue), str (three letter residue name of proposed residue)
        Returns
        -------
        topology : simtk.openmm.app.Topology
            extra atoms from old residue have been deleted, missing atoms in new residue not yet added
        missing_atoms : dict
            key : simtk.openmm.app.topology.Residue
            value : list(simtk.openmm.app.topology._TemplateAtomData)
        """
        # delete excess atoms from old residues and identify new atoms for new residues
        # delete_atoms : list(simtk.openmm.app.topology.Atom) atoms from existing residue not in new residue
        delete_atoms = list()
        # missing_atoms : dict, key : simtk.openmm.app.topology.Residue, value : list(simtk.openmm.app.topology._TemplateAtomData)
        missing_atoms = dict()
        # residue : simtk.openmm.app.topology.Residue (existing residue)
        # replace_with : str (three letter residue name of proposed residue)
        for k, (residue, replace_with) in enumerate(residue_map):
            # chain_residues : list(simtk.openmm.app.topology.Residue) all residues in chain ==> why
            chain_residues = list(residue.chain.residues())
            if residue == chain_residues[0]:
                replace_with = 'N'+replace_with
                residue_map[k] = (residue, replace_with)
            if residue == chain_residues[-1]:
                replace_with = 'C'+replace_with
                residue_map[k] = (residue, replace_with)
            # template : simtk.openmm.app.topology._TemplateData
            template = self._templates[replace_with]
            # standard_atoms : set of unique atom names within new residue : str
            standard_atoms = set(atom.name for atom in template.atoms)
            # template_atoms : list(simtk.openmm.app.topology._TemplateAtomData) atoms in new residue
            template_atoms = list(template.atoms)
            # atom_names : set of unique atom names within existing residue : str
            atom_names = set(atom.name for atom in residue.atoms())
            # atom : simtk.openmm.app.topology.Atom in existing residue
            for atom in residue.atoms(): # shouldn't remove hydrogen
                if atom.name not in standard_atoms:
                    delete_atoms.append(atom)
#            if residue == chain_residues[0]: # this doesn't apply?
#                template_atoms = [atom for atom in template_atoms if atom.name not in ('P', 'OP1', 'OP2')]
            # missing : list(simtk.openmm.app.topology._TemplateAtomData) atoms in new residue not found in existing residue
            missing = list()
            # atom : simtk.openmm.app.topology._TemplateAtomData atoms in new residue
            for atom in template_atoms:
                if atom.name not in atom_names:
                    missing.append(atom)
            # BUG : error if missing = 0?
            if len(missing) > 0:
                missing_atoms[residue] = missing
        # topology : simtk.openmm.app.Topology extra atoms from old residue have been deleted, missing atoms in new residue not yet added
        topology = self._to_delete(topology, delete_atoms)
        topology = self._to_delete_bonds(topology, residue_map)
        topology._numAtoms = len(list(topology.atoms()))

        return(topology, missing_atoms)

    def _to_delete(self, topology, delete_atoms):
        """
        remove instances of atoms and corresponding bonds from topology

        Arguments
        ---------
        topology : simtk.openmm.app.Topology
        delete_atoms : list(simtk.openmm.app.topology.Atom)
            atoms from existing residue not in new residue
        Returns
        -------
        topology : simtk.openmm.app.Topology
            extra atoms from old residue have been deleted, missing atoms in new residue not yet added
        """
        # delete_atoms : list(simtk.openmm.app.topology.Atom) atoms from existing residue not in new residue
        # atom : simtk.openmm.app.topology.Atom
        for atom in delete_atoms:
            atom.residue._atoms.remove(atom)
            for bond in topology._bonds:
                if atom in bond:
                    topology._bonds = list(filter(lambda a: a != bond, topology._bonds))
        # topology : simtk.openmm.app.Topology extra atoms from old residue have been deleted, missing atoms in new residue not yet added
        return topology

    def _to_delete_bonds(self, topology, residue_map):
        """
        Remove any bonds between atoms in both new and old residue that do not belong in new residue
        (e.g. breaking the ring in PRO)
        Arguments
        ---------
        topology : simtk.openmm.app.Topology
        residue_map : list(tuples)
            simtk.openmm.app.topology.Residue (existing residue), str (three letter residue name of proposed residue)
        Returns
        -------
        topology : simtk.openmm.app.Topology
            extra atoms and bonds from old residue have been deleted, missing atoms in new residue not yet added
        """

        for residue, replace_with in residue_map:
            # template : simtk.openmm.app.topology._TemplateData
            template = self._templates[replace_with]

            old_res_bonds = list()
            # bond : tuple(simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom)
            for atom1, atom2 in topology._bonds:
                if atom1.residue == residue or atom2.residue == residue:
                    old_res_bonds.append((atom1.name, atom2.name))
            # make a list of bonds that should exist in new residue
            # template_bonds : list(tuple(str (atom name), str (atom name))) bonds in template
            template_bonds = [(template.atoms[bond[0]].name, template.atoms[bond[1]].name) for bond in template.bonds]
            # add any bonds that exist in template but not in new residue
            for bond in old_res_bonds:
                if bond not in template_bonds and (bond[1],bond[0]) not in template_bonds:
                    topology._bonds = list(filter(lambda a: a != bond, topology._bonds))
                    topology._bonds = list(filter(lambda a: a != (bond[1],bond[0]), topology._bonds))
        return topology

    def _add_new_atoms(self, topology, missing_atoms, residue_map):
        """
        add new atoms (and corresponding bonds) to new residues

        Arguments
        ---------
        topology : simtk.openmm.app.Topology
            extra atoms from old residue have been deleted, missing atoms in new residue not yet added
        missing_atoms : dict
            key : simtk.openmm.app.topology.Residue
            value : list(simtk.openmm.app.topology._TemplateAtomData)
        residue_map : list(tuples)
            simtk.openmm.app.topology.Residue, str (three letter residue name of new residue)
        Returns
        -------
        topology : simtk.openmm.app.Topology
            new residue has all correct atoms for desired mutation
        """
        # add new atoms to new residues
        # topology : simtk.openmm.app.Topology extra atoms from old residue have been deleted, missing atoms in new residue not yet added
        # missing_atoms : dict, key : simtk.openmm.app.topology.Residue, value : list(simtk.openmm.app.topology._TemplateAtomData)
        # residue_map : list(tuples : simtk.openmm.app.topology.Residue (old residue), str (three letter residue name of new residue))

        # new_atoms : list(simtk.openmm.app.topology.Atom) atoms that have been added to new residue
        new_atoms = list()
        # k : int
        # residue_ent : tuple(simtk.openmm.app.topology.Residue (old residue), str (three letter residue name of new residue))
        for k, residue_ent in enumerate(residue_map):
            # residue : simtk.openmm.app.topology.Residue (old residue) BUG : wasn't this editing the residue in place what is old and new map
            residue = residue_ent[0]
            # replace_with : str (three letter residue name of new residue)
            replace_with = residue_ent[1]
            # directly edit the simtk.openmm.app.topology.Residue instance
            residue.name = replace_with
            # load template to compare bonds
            # template : simtk.openmm.app.topology._TemplateData
            template = self._templates[replace_with]
            # add each missing atom
            # atom : simtk.openmm.app.topology._TemplateAtomData
            try:
                for atom in missing_atoms[residue]:
                    # new_atom : simtk.openmm.app.topology.Atom
                    new_atom = topology.addAtom(atom.name, atom.element, residue)
                    # new_atoms : list(simtk.openmm.app.topology.Atom)
                    new_atoms.append(new_atom)
            except KeyError:
                pass
            # make a dictionary to map atom names in new residue to atom object
            # new_res_atoms : dict, key : str (atom name) , value : simtk.openmm.app.topology.Atom
            new_res_atoms = dict()
            # atom : simtk.openmm.app.topology.Atom
            for atom in residue.atoms():
                # atom.name : str
                new_res_atoms[atom.name] = atom
            # make a list of bonds already existing in new residue
            # new_res_bonds : list(tuple(str (atom name), str (atom name))) bonds between atoms both within new residue
            new_res_bonds = list()
            # bond : tuple(simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom)
            for bond in topology._bonds:
                if bond[0].residue == residue and bond[1].residue == residue:
                    new_res_bonds.append((bond[0].name, bond[1].name))
            # make a list of bonds that should exist in new residue
            # template_bonds : list(tuple(str (atom name), str (atom name))) bonds in template
            template_bonds = [(template.atoms[bond[0]].name, template.atoms[bond[1]].name) for bond in template.bonds]
            # add any bonds that exist in template but not in new residue
            for bond in new_res_bonds:
                if bond not in template_bonds and (bond[1],bond[0]) not in template_bonds:
                    bonded_0 = new_res_atoms[bond[0]]
                    bonded_1 = new_res_atoms[bond[1]]
                    topology._bonds.remove((bonded_0, bonded_1))
            for bond in template_bonds:
                if bond not in new_res_bonds and (bond[1],bond[0]) not in new_res_bonds:
                    # new_bonded_0 : simtk.openmm.app.topology.Atom
                    new_bonded_0 = new_res_atoms[bond[0]]
                    # new_bonded_1 : simtk.openmm.app.topology.Atom
                    new_bonded_1 = new_res_atoms[bond[1]]
                    topology.addBond(new_bonded_0, new_bonded_1)
        topology._numAtoms = len(list(topology.atoms()))

        # add new bonds to the new residues
        return topology

    def _construct_atom_map(self, residue_map, old_topology, index_to_new_residues, new_topology):
        # atom_map : dict, key : int (index of atom in old topology) , value : int (index of same atom in new topology)
        atom_map = dict()

        # atoms with an old_index attribute should be mapped
        # k : int
        # atom : simtk.openmm.app.topology.Atom
        def match_backbone(old_residue, new_residue, atom_name):
            """
            Forcibly including CA and N in the map even if they don't meet
            matching criteria
            """
            found_old_atom = False
            for atom in old_residue.atoms():
                if atom.name == atom_name:
                    old_atom = atom
                    found_old_atom = True
                    break
            assert found_old_atom
            found_new_atom = False
            for atom in new_residue.atoms():
                if atom.name == atom_name:
                    new_atom = atom
                    found_new_atom = True
                    break
            assert found_new_atom
            return new_atom.index, old_atom.index

        modified_residues = dict()
        old_residues = dict()
        for map_entry in residue_map:
            modified_residues[map_entry[0].index] = map_entry[0]
        for residue in old_topology.residues():
            if residue.index in index_to_new_residues.keys():
                old_residues[residue.index] = residue
        for k, atom in enumerate(new_topology.atoms()):
            atom.index=k
            if atom.residue in modified_residues.values():
                continue
            try:
                atom_map[atom.index] = atom.old_index
            except AttributeError:
                pass
        for index in index_to_new_residues.keys():
            old_oemol_res = FFAllAngleGeometryEngine._oemol_from_residue(old_residues[index])
            new_oemol_res = FFAllAngleGeometryEngine._oemol_from_residue(modified_residues[index])
            _ , local_atom_map = self._get_mol_atom_matches(old_oemol_res, new_oemol_res)

            for backbone_name in ['CA','N']:
                new_index, old_index = match_backbone(old_residues[index], modified_residues[index], backbone_name)
                local_atom_map[new_index] = old_index

            atom_map.update(local_atom_map)

        return atom_map

    def _get_mol_atom_matches(self, current_molecule, proposed_molecule):
        """
        Given two molecules, returns the mapping of atoms between them.

        Arguments
        ---------
        current_molecule : openeye.oechem.oemol object
             The current molecule in the sampler
        proposed_molecule : openeye.oechem.oemol object
             The proposed new molecule

        Returns
        -------
        matches : list of match
            list of the matches between the molecules
        """
        oegraphmol_current = oechem.OEGraphMol(current_molecule)
        oegraphmol_proposed = oechem.OEGraphMol(proposed_molecule)
        mcs = oechem.OEMCSSearch(oechem.OEMCSType_Exhaustive)

        atomexpr = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_RingMember | oechem.OEExprOpts_Degree | oechem.OEExprOpts_AtomicNumber
        bondexpr = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_RingMember
        mcs.Init(oegraphmol_current, atomexpr, bondexpr)

        def forcibly_matched(mcs, proposed, atom_name):
            old_atom = list(mcs.GetPattern().GetAtoms(atom_name))
            assert len(old_atom) == 1
            old_atom = old_atom[0]

            new_atom = list(proposed.GetAtoms(atom_name))
            assert len(new_atom) == 1
            new_atom = new_atom[0]
            return old_atom, new_atom

        force_matches = list()
        for matched_atom_name in ['C','O']:
            force_matches.append(oechem.OEHasAtomName(matched_atom_name))

        for force_match in force_matches:
            old_atom, new_atom = forcibly_matched(mcs, oegraphmol_proposed, force_match)
            this_match = oechem.OEMatchPairAtom(old_atom, new_atom)
            assert mcs.AddConstraint(this_match)

        mcs.SetMCSFunc(oechem.OEMCSMaxBondsCompleteCycles())
        unique = True
        matches = [m for m in mcs.Match(oegraphmol_proposed, unique)]
        if len(matches)==0:
            from perses.tests.utils import describe_oemol
            msg = 'No matches found in _get_mol_atom_matches.\n'
            msg += '\n'
            msg += 'oegraphmol_current:\n'
            msg += describe_oemol(oegraphmol_current)
            msg += '\n'
            msg += 'oegraphmol_proposed:\n'
            msg += describe_oemol(oegraphmol_proposed)
            raise Exception(msg)
        match = np.random.choice(matches)
        new_to_old_atom_map = {}
        for matchpair in match.GetAtoms():
            old_index = matchpair.pattern.GetData("topology_index")
            new_index = matchpair.target.GetData("topology_index")
            if old_index < 0 or new_index < 0:
                continue
            new_to_old_atom_map[new_index] = old_index
        return matches, new_to_old_atom_map


[docs]    def compute_state_key(self, topology):
        for chain in topology.chains():
            if chain.id == self._chain_id:
                break
        chemical_state_key = ''
        for (index, res) in enumerate(chain._residues):
            if (index > 0):
                chemical_state_key += '-'
            chemical_state_key += res.name

        return chemical_state_key

[docs]class PointMutationEngine(PolymerProposalEngine):
    """
    Arguments
    --------
    wildtype_topology : openmm.app.Topology
    system_generator : SystemGenerator
    chain_id : str
        id of the chain to mutate
        (using the first chain with the id, if there are multiple)
    proposal_metadata : dict -- OPTIONAL
        Contains information necessary to initialize proposal engine
    max_point_mutants : int -- OPTIONAL
        default = None
    residues_allowed_to_mutate : list(str) -- OPTIONAL
        default = None
    allowed_mutations : list(list(tuple)) -- OPTIONAL
        default = None
        ('residue id to mutate','desired mutant residue name (3-letter code)')
        Example:
            Desired systems are wild type T4 lysozyme, T4 lysozyme L99A, and T4 lysozyme L99A/M102Q
            allowed_mutations = [
                [('99','ALA')],
                [('99','ALA'),('102','GLN')]
            ]
    always_change : Boolean -- OPTIONAL, default True
        Have the proposal engine always propose another mutation
        If allowed_mutations is not specified, always_change will require ALL
        point mutations to be different
        The proposal engine will choose number of locations specified by
        max_point_mutants, and will require all of those residues to change
        eg: if old topology included L99A and M102Q, the new proposal cannot
            include L99A OR M102Q
        (This is only relevant in cases where max_point_mutants > 1)
    """

[docs]    def __init__(self, wildtype_topology, system_generator, chain_id, proposal_metadata=None, max_point_mutants=None, residues_allowed_to_mutate=None, allowed_mutations=None, verbose=False, always_change=True):
        super(PointMutationEngine,self).__init__(system_generator, chain_id, proposal_metadata=proposal_metadata, verbose=verbose, always_change=always_change)

        # Check that provided topology has specified chain.
        chain_ids_in_topology = [ chain.id for chain in wildtype_topology.chains() ]
        if chain_id not in chain_ids_in_topology:
            raise Exception("Specified chain_id '%s' not found in provided wildtype_topology. Choices are: %s" % (chain_id, str(chain_ids_in_topology)))

        self._wildtype = wildtype_topology
        self._max_point_mutants = max_point_mutants
        self._ff = system_generator.forcefield
        self._templates = self._ff._templates
        self._residues_allowed_to_mutate = residues_allowed_to_mutate
        if allowed_mutations is not None:
            for mutation in allowed_mutations:
                mutation.sort()
        self._allowed_mutations = allowed_mutations
        if proposal_metadata is None:
            proposal_metadata = dict()
        self._metadata = proposal_metadata
        if max_point_mutants is None and allowed_mutations is None:
            raise Exception("Must specify either max_point_mutants or allowed_mutations.")
        if max_point_mutants is not None and allowed_mutations is not None:
            warnings.warn("PointMutationEngine: max_point_mutants and allowed_mutations were both specified -- max_point_mutants will be ignored")

    def _choose_mutant(self, topology, metadata):
        chain_id = self._chain_id
        old_key = self.compute_state_key(topology)
        index_to_new_residues = self._undo_old_mutants(topology, chain_id, old_key)
        if self._allowed_mutations is not None:
            allowed_mutations = self._allowed_mutations
            index_to_new_residues = self._choose_mutation_from_allowed(topology, chain_id, allowed_mutations, index_to_new_residues, old_key)
        else:
            # index_to_new_residues : dict, key : int (index) , value : str (three letter residue name)
            index_to_new_residues = self._propose_mutations(topology, chain_id, index_to_new_residues, old_key)
        # metadata['mutations'] : list(str (three letter WT residue name - index - three letter MUT residue name) )
        metadata['mutations'] = self._save_mutations(topology, index_to_new_residues)

        return index_to_new_residues, metadata

    def _undo_old_mutants(self, topology, chain_id, old_key):
        index_to_new_residues = dict()
        if old_key == 'WT':
            return index_to_new_residues
        for chain in topology.chains():
            if chain.id == chain_id:
                break
        residue_id_to_index = {residue.id : residue.index for residue in chain._residues}
        for mutant in old_key.split('-'):
            old_res = mutant[:3]
            residue_id = mutant[3:-3]
            new_res = mutant[-3:]
            index_to_new_residues[residue_id_to_index[residue_id]] = old_res
        return index_to_new_residues

    def _choose_mutation_from_allowed(self, topology, chain_id, allowed_mutations, index_to_new_residues, old_key):
        """
        Used when allowed mutations have been specified
        Assume (for now) uniform probability of selecting each specified mutant

        Arguments
        ---------
        topology : simtk.openmm.app.Topology
        chain_id : str
        allowed_mutations : list(list(tuple))
            list of allowed mutant states; each entry in the list is a list because multiple mutations may be desired
            tuple : (str, str) -- residue id and three-letter amino acid code of desired mutant

        Returns
        -------
        index_to_new_residues : dict
            key : int (index, zero-indexed in chain)
            value : str (three letter residue name)
        """

        # chain : simtk.openmm.app.topology.Chain
        chain_found = False
        for anychain in topology.chains():
            if anychain.id == chain_id:
                chain = anychain
                chain_found = True
                break
        if not chain_found:
            chains = [ chain.id for chain in topology.chains() ]
            raise Exception("Chain '%s' not found in Topology. Chains present are: %s" % (chain_id, str(chains)))
        residue_id_to_index = {residue.id : residue.index for residue in chain._residues}
        # location_prob : np.array, probability value for each residue location (uniform)
        if self._always_change:
            location_prob = np.array([1.0/len(allowed_mutations) for i in range(len(allowed_mutations)+1)])
            if old_key == 'WT':
                location_prob[len(allowed_mutations)] = 0.0
            else:
                current_mutation = list()
                for mutant in old_key.split('-'):
                    residue_id = mutant[3:-3]
                    new_res = mutant[-3:]
                    current_mutation.append((residue_id,new_res))
                current_mutation.sort()
                location_prob[allowed_mutations.index(current_mutation)] = 0.0
        else:
            location_prob = np.array([1.0/(len(allowed_mutations)+1.0) for i in range(len(allowed_mutations)+1)])
        proposed_location = np.random.choice(range(len(allowed_mutations)+1), p=location_prob)
        if proposed_location == len(allowed_mutations):
            # choose WT
            pass
        else:
            for residue_id, residue_name in allowed_mutations[proposed_location]:
                # original_residue : simtk.openmm.app.topology.Residue
                original_residue = chain._residues[residue_id_to_index[residue_id]]
                if original_residue.name in ['HID','HIE']:
                    original_residue.name = 'HIS'
                if original_residue.name == residue_name:
                    continue
                # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name)
                index_to_new_residues[residue_id_to_index[residue_id]] = residue_name
                if residue_name == 'HIS':
                    his_state = ['HIE','HID']
                    his_prob = np.array([0.5 for i in range(len(his_state))])
                    his_choice = np.random.choice(range(len(his_state)),p=his_prob)
                    index_to_new_residues[residue_id_to_index[residue_id]] = his_state[his_choice]
                # DEBUG
                if self.verbose: print('Proposed mutation: %s %s %s' % (original_residue.name, residue_id, residue_name))

        # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name)
        return index_to_new_residues

    def _propose_mutations(self, topology, chain_id, index_to_new_residues, old_key):
        """
        Arguments
        ---------
        topology : simtk.openmm.app.Topology
        chain_id : str
        index_to_new_residues : dict
            contains information to mutate back to WT as starting point for new mutants
        old_key : str
            chemical_state_key for old topology

        Returns
        -------
        index_to_new_residues : dict
            key : int (index, zero-indexed in chain)
            value : str (three letter residue name)
        """
        # this shouldn't be here
        aminos = ['ALA','ARG','ASN','ASP','CYS','GLN','GLU','GLY','HIS','ILE','LEU','LYS','MET','PHE','PRO','SER','THR','TRP','TYR','VAL']
        # chain : simtk.openmm.app.topology.Chain
        chain_found = False
        for anychain in topology.chains():
            if anychain.id == chain_id:
                chain = anychain
                if self._residues_allowed_to_mutate is None:
                    # num_residues : int
                    num_residues = len(chain._residues)
                    chain_residues = chain._residues
                chain_found = True
                residue_id_to_index = {residue.id : residue.index for residue in chain._residues}
                break
        if not chain_found:
            chains = [ chain.id for chain in topology.chains() ]
            raise Exception("Chain '%s' not found in Topology. Chains present are: %s" % (chain_id, str(chains)))
        if self._residues_allowed_to_mutate is not None:
            num_residues = len(self._residues_allowed_to_mutate)
            chain_residues = self._mutable_residues(chain)
        # location_prob : np.array, probability value for each residue location (uniform)
        location_prob = np.array([1.0/num_residues for i in range(num_residues)])
        if self._always_change:
            residue_id_to_index = {residue.id : residue.index for residue in chain._residues}
            current_mutation = dict()
            if old_key != 'WT':
                for mutant in old_key.split('-'):
                    residue_id = mutant[3:-3]
                    new_res = mutant[-3:]
                    current_mutation[residue_id] = new_res

        for i in range(self._max_point_mutants):
            # proposed_location : int, index of chosen entry in location_prob
            proposed_location = np.random.choice(range(num_residues), p=location_prob)
            # original_residue : simtk.openmm.app.topology.Residue
            original_residue = chain_residues[proposed_location]
            if original_residue.name in ['HIE','HID']:
                original_residue.name = 'HIS'
            # amino_prob : np.array, probability value for each amino acid option (uniform)
            if self._always_change:
                amino_prob = np.array([1.0/(len(aminos)-1) for l in range(len(aminos))])
                amino_prob[aminos.index(original_residue.name)] = 0.0
            else:
                amino_prob = np.array([1.0/(len(aminos)) for k in range(len(aminos))])
            # proposed_amino_index : int, index of three letter residue name in aminos list
            proposed_amino_index = np.random.choice(range(len(aminos)), p=amino_prob)
            # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name)
            if self._residues_allowed_to_mutate is not None:
                proposed_location = residue_id_to_index[self._residues_allowed_to_mutate[proposed_location]]
            index_to_new_residues[proposed_location] = aminos[proposed_amino_index]
            if aminos[proposed_amino_index] == 'HIS':
                his_state = ['HIE','HID']
                his_prob = np.array([0.5 for j in range(len(his_state))])
                his_choice = np.random.choice(range(len(his_state)),p=his_prob)
                index_to_new_residues[proposed_location] = his_state[his_choice]
        return index_to_new_residues

    def _mutable_residues(self, chain):
        chain_residues = [residue for residue in chain._residues if residue.id in self._residues_allowed_to_mutate]
        return chain_residues

    def _save_mutations(self, topology, index_to_new_residues):
        """
        Arguments
        ---------
        topology : simtk.openmm.app.Topology
        index_to_new_residues : dict
            key : int (index, zero-indexed in chain)
            value : str (three letter residue name)
        Returns
        -------
        mutations : list(str)
            XXX-##-XXX
            three letter WT residue name - id - three letter MUT residue name
            id is based on the protein sequence NOT zero-indexed

        """
        return [r.name+'-'+str(r.id)+'-'+index_to_new_residues[r.index] for r in topology.residues() if r.index in index_to_new_residues]

[docs]    def compute_state_key(self, topology):
        chemical_state_key = ''
        wildtype = self._wildtype
        for anychain in topology.chains():
            if anychain.id == self._chain_id:
                chain = anychain
                break
        for anywt_chain in wildtype.chains():
            if anywt_chain.id == self._chain_id:
                wt_chain = anywt_chain
                break
        for wt_res, res in zip(wt_chain._residues, chain._residues):
            if wt_res.name != res.name:
                if chemical_state_key:
                    chemical_state_key+='-'
                chemical_state_key+= str(wt_res.name)+str(res.id)+str(res.name)
        if not chemical_state_key:
            chemical_state_key = 'WT'
        return chemical_state_key

[docs]class PeptideLibraryEngine(PolymerProposalEngine):
    """

    Arguments
    --------
    system_generator : SystemGenerator
    library : list of strings
        each string is a 1-letter-code list of amino acid sequence
    chain_id : str
        id of the chain to mutate
        (using the first chain with the id, if there are multiple)
    proposal_metadata : dict -- OPTIONAL
        Contains information necessary to initialize proposal engine
    """

[docs]    def __init__(self, system_generator, library, chain_id, proposal_metadata=None, verbose=False, always_change=True):
        super(PeptideLibraryEngine,self).__init__(system_generator, chain_id, proposal_metadata=proposal_metadata, verbose=verbose, always_change=always_change)
        self._library = library
        self._ff = system_generator.forcefield
        self._templates = self._ff._templates

    def _choose_mutant(self, topology, metadata):
        """
        Used when library of pepide sequences has been provided
        Assume (for now) uniform probability of selecting each peptide

        Arguments
        ---------
        topology : simtk.openmm.app.Topology
        chain_id : str
        allowed_mutations : list(list(tuple))
            list of allowed mutant states; each entry in the list is a list because multiple mutations may be desired
            tuple : (str, str) -- residue id and three-letter amino acid code of desired mutant

        Returns
        -------
        index_to_new_residues : dict
            key : int (index, zero-indexed in chain)
            value : str (three letter residue name)
        metadata : dict
            has not been altered
        """
        library = self._library

        index_to_new_residues = dict()

        # chain : simtk.openmm.app.topology.Chain
        chain_id = self._chain_id
        chain_found = False
        for chain in topology.chains():
            if chain.id == chain_id:
                chain_found = True
                break
        if not chain_found:
            chains = [ chain.id for chain in topology.chains() ]
            raise Exception("Chain '%s' not found in Topology. Chains present are: %s" % (chain_id, str(chains)))
        # location_prob : np.array, probability value for each residue location (uniform)
        location_prob = np.array([1.0/len(library) for i in range(len(library))])
        proposed_location = np.random.choice(range(len(library)), p=location_prob)
        for residue_index, residue_one_letter in enumerate(library[proposed_location]):
            # original_residue : simtk.openmm.app.topology.Residue
            original_residue = chain._residues[residue_index]
            residue_name = self._one_to_three_letter_code(residue_one_letter)
            if original_residue.name == residue_name:
                continue
            # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name)
            index_to_new_residues[residue_index] = residue_name
            if residue_name == 'HIS':
                his_state = ['HIE','HID']
                his_prob = np.array([0.5 for i in range(len(his_state))])
                his_choice = np.random.choice(range(len(his_state)),p=his_prob)
                index_to_new_residues[residue_index] = his_state[his_choice]

        # index_to_new_residues : dict, key : int (index of residue, 0-indexed), value : str (three letter residue name)
        return index_to_new_residues, metadata

    def _one_to_three_letter_code(self, residue_one_letter):
        three_letter_code = {
            'A' : 'ALA',
            'C' : 'CYS',
            'D' : 'ASP',
            'E' : 'GLU',
            'F' : 'PHE',
            'G' : 'GLY',
            'H' : 'HIS',
            'I' : 'ILE',
            'K' : 'LYS',
            'L' : 'LEU',
            'M' : 'MET',
            'N' : 'ASN',
            'P' : 'PRO',
            'Q' : 'GLN',
            'R' : 'ARG',
            'S' : 'SER',
            'T' : 'THR',
            'V' : 'VAL',
            'W' : 'TRP',
            'Y' : 'TYR'
        }
        return three_letter_code[residue_one_letter]

[docs]class SystemGenerator(object):
    """
    This is a utility class to generate OpenMM Systems from
    topology objects.

    Parameters
    ----------
    forcefields_to_use : list of string
        List of the names of ffxml files that will be used in system creation.
    forcefield_kwargs : dict of arguments to createSystem, optional
        Allows specification of various aspects of system creation.
    metadata : dict, optional
        Metadata associated with the SystemGenerator.
    use_antechamber : bool, optional, default=True
        If True, will add the GAFF residue template generator.
    barostat : MonteCarloBarostat, optional, default=None
        If provided, a matching barostat will be added to the generated system.
    """

[docs]    def __init__(self, forcefields_to_use, forcefield_kwargs=None, metadata=None, use_antechamber=True, barostat=None):
        self._forcefield_xmls = forcefields_to_use
        self._forcefield_kwargs = forcefield_kwargs if forcefield_kwargs is not None else {}
        self._forcefield = app.ForceField(*self._forcefield_xmls)
        if use_antechamber:
            self._forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator)
        if 'removeCMMotion' not in self._forcefield_kwargs:
            self._forcefield_kwargs['removeCMMotion'] = False
        self._barostat = None
        if barostat is not None:
            pressure = barostat.getDefaultPressure()
            if hasattr(barostat, 'getDefaultTemperature'):
                temperature = barostat.getDefaultTemperature()
            else:
                temperature = barostat.getTemperature()
            frequency = barostat.getFrequency()
            self._barostat = (pressure, temperature, frequency)

[docs]    def getForceField(self):
        """
        Return the associated ForceField object.

        Returns
        -------
        forcefield : simtk.openmm.app.ForceField
            The current ForceField object.
        """
        return self._forcefield

[docs]    def build_system(self, new_topology):
        """
        Build a system from the new_topology, adding templates
        for the molecules in oemol_list

        Parameters
        ----------
        new_topology : simtk.openmm.app.Topology object
            The topology of the system

        Returns
        -------
        new_system : openmm.System
            A system object generated from the topology
        """
        _logger.info('Generating System...')
        timer_start = time.time()

        try:
            system = self._forcefield.createSystem(new_topology, **self._forcefield_kwargs)
        except Exception as e:
            from simtk import unit
            nparticles = sum([1 for atom in new_topology.atoms()])
            positions = unit.Quantity(np.zeros([nparticles,3], np.float32), unit.angstroms)
            # Write PDB file of failed topology
            from simtk.openmm.app import PDBFile
            outfile = open('BuildSystem-failure.pdb', 'w')
            pdbfile = PDBFile.writeFile(new_topology, positions, outfile)
            outfile.close()
            msg = str(e)
            import traceback
            msg += traceback.format_exc(e)
            msg += "\n"
            msg += "PDB file written as 'BuildSystem-failure.pdb'"
            raise Exception(msg)

        # Add barostat if requested.
        if self._barostat is not None:
            MAXINT = np.iinfo(np.int32).max
            barostat = openmm.MonteCarloBarostat(*self._barostat)
            seed = np.random.randint(MAXINT)
            barostat.setRandomNumberSeed(seed)
            system.addForce(barostat)

        # DEBUG: See if any torsions have duplicate atoms.
        #from perses.tests.utils import check_system
        #check_system(system)

        _logger.info('System generation took %.3f s' % (time.time() - timer_start))

        return system

    @property
    def ffxmls(self):
        return self._forcefield_xmls

    @property
    def forcefield(self):
        return self._forcefield

[docs]class SmallMoleculeSetProposalEngine(ProposalEngine):
    """
    This class proposes new small molecules from a prespecified set. It uses
    uniform proposal probabilities, but can be extended. The user is responsible
    for providing a list of smiles that can be interconverted! The class includes
    extra functionality to assist with that (it is slow).

    Parameters
    ----------
    list_of_smiles : list of string
        list of smiles that will be sampled
    system_generator : SystemGenerator object
        SystemGenerator initialized with the appropriate forcefields
    residue_name : str
        The name that will be used for small molecule residues in the topology
    proposal_metadata : dict
        metadata for the proposal engine
    storage : NetCDFStorageView, optional, default=None
        If specified, write statistics to this storage
    """

[docs]    def __init__(self, list_of_smiles, system_generator, residue_name='MOL',
                 atom_expr=None, bond_expr=None, proposal_metadata=None, storage=None,
                 always_change=True, atom_map=None):

        # Default atom and bond expressions for MCSS
        self.atom_expr = atom_expr or DEFAULT_ATOM_EXPRESSION
        self.bond_expr = bond_expr or DEFAULT_BOND_EXPRESSION
        self._allow_ring_breaking = True # allow ring breaking

        # Canonicalize all SMILES strings
        self._smiles_list = [SmallMoleculeSetProposalEngine.canonicalize_smiles(smiles) for smiles in set(list_of_smiles)]

        self._n_molecules = len(self._smiles_list)

        self._residue_name = residue_name
        self._generated_systems = dict()
        self._generated_topologies = dict()
        self._matches = dict()

        self._storage = None
        if storage is not None:
            self._storage = NetCDFStorageView(storage, modname=self.__class__.__name__)

        self._probability_matrix = self._calculate_probability_matrix(self._smiles_list)

        self._atom_map = atom_map

        super(SmallMoleculeSetProposalEngine, self).__init__(system_generator, proposal_metadata=proposal_metadata, always_change=always_change)

[docs]    def propose(self, current_system, current_topology, current_mol=None, proposed_mol=None, current_metadata=None):
        """
        Propose the next state, given the current state

        Parameters
        ----------
        current_system : openmm.System object
            the system of the current state
        current_topology : app.Topology object
            the topology of the current state
        current_metadata : dict
            dict containing current smiles as a key
        current_mol : OEMol, optional, default=None
            If specified, use this OEMol instead of converting from topology
        proposed_mol : OEMol, optional, default=None
            If specified, use this OEMol instead of converting from topology

        Returns
        -------
        proposal : TopologyProposal object
           topology proposal object
        """
        # Determine SMILES string for current small molecule
        if current_mol is None:
            current_mol_smiles, current_mol = self._topology_to_smiles(current_topology)
        else:
            # TODO: Make sure we're using canonical mol to smiles conversion
            current_mol_smiles = oechem.OEMolToSmiles(current_mol)

        # Remove the small molecule from the current Topology object
        current_receptor_topology = self._remove_small_molecule(current_topology)

        # Find the initial atom index of the small molecule in the current topology
        old_mol_start_index, len_old_mol = self._find_mol_start_index(current_topology)

        # Determine atom indices of the small molecule in the current topology
        old_alchemical_atoms = range(old_mol_start_index, len_old_mol)

        # Select the next molecule SMILES given proposal probabilities
        if proposed_mol is None:
            proposed_mol_smiles, proposed_mol, logp_proposal = self._propose_molecule(current_system, current_topology, current_mol_smiles)
        else:
            # TODO: Make sure we're using canonical mol to smiles conversion
            proposed_mol_smiles = oechem.OEMolToSmiles(current_mol)
            logp_proposal = 0.0

        # Build the new Topology object, including the proposed molecule
        new_topology = self._build_new_topology(current_receptor_topology, proposed_mol)
        new_mol_start_index, len_new_mol = self._find_mol_start_index(new_topology)

        # Generate an OpenMM System from the proposed Topology
        new_system = self._system_generator.build_system(new_topology)

        # Determine atom mapping between old and new molecules
        if not self._atom_map:
            mol_atom_map = self._get_mol_atom_map(current_mol, proposed_mol, atom_expr=self.atom_expr,
                                                  bond_expr=self.bond_expr, verbose=self.verbose,
                                                  allow_ring_breaking=self._allow_ring_breaking)
        else:
            mol_atom_map = self._atom_map

        # Adjust atom mapping indices for the presence of the receptor
        adjusted_atom_map = {}
        for (key, value) in mol_atom_map.items():
            adjusted_atom_map[key+new_mol_start_index] = value + old_mol_start_index

        # Incorporate atom mapping of all environment atoms
        old_mol_offset = len_old_mol
        for i in range(new_mol_start_index):
            if i >= old_mol_start_index:
                old_idx = i + old_mol_offset
            else:
                old_idx = i
            adjusted_atom_map[i] = old_idx

        # Create the TopologyProposal onbject
        proposal = TopologyProposal(logp_proposal=logp_proposal, new_to_old_atom_map=adjusted_atom_map,
            old_topology=current_topology, new_topology=new_topology,
            old_system=current_system, new_system=new_system,
            old_alchemical_atoms=old_alchemical_atoms,
            old_chemical_state_key=current_mol_smiles, new_chemical_state_key=proposed_mol_smiles)

        ndelete = proposal.old_system.getNumParticles() - len(proposal.old_to_new_atom_map.keys())
        ncreate = proposal.new_system.getNumParticles() - len(proposal.old_to_new_atom_map.keys())
        _logger.info('Proposed transformation would delete %d atoms and create %d atoms.' % (ndelete, ncreate))

        return proposal

[docs]    @staticmethod
    def canonicalize_smiles(smiles):
        """
        Convert a SMILES string into canonical isomeric smiles

        Parameters
        ----------
        smiles : str
            Any valid SMILES for a molecule

        Returns
        -------
        iso_can_smiles : str
            OpenEye isomeric canonical smiles corresponding to the input
        """
        mol = oechem.OEMol()
        oechem.OESmilesToMol(mol, smiles)
        oechem.OEAddExplicitHydrogens(mol)
        iso_can_smiles = oechem.OECreateSmiString(mol, OESMILES_OPTIONS)
        return iso_can_smiles

    def _topology_to_smiles(self, topology):
        """
        Get the smiles string corresponding to a specific residue in an
        OpenMM Topology

        Parameters
        ----------
        topology : app.Topology
            The topology containing the molecule of interest

        Returns
        -------
        smiles_string : string
            an isomeric canonicalized SMILES string representing the molecule
        oemol : oechem.OEMol object
            molecule
        """
        molecule_name = self._residue_name

        matching_molecules = [res for res in topology.residues() if res.name[:3] == molecule_name[:3]]  # Find residue in topology by searching for residues with name "MOL"
        if len(matching_molecules) != 1:
            raise ValueError("More than one residue with the same name!")
        mol_res = matching_molecules[0]
        oemol = forcefield_generators.generateOEMolFromTopologyResidue(mol_res)
        smiles_string = oechem.OECreateSmiString(oemol, OESMILES_OPTIONS)
        final_smiles_string = smiles_string
        return final_smiles_string, oemol

[docs]    def compute_state_key(self, topology):
        """
        Given a topology, come up with a state key string.
        For this class, the state key is an isomeric canonical SMILES.

        Parameters
        ----------
        topology : app.Topology object
            The topology object in question.

        Returns
        -------
        chemical_state_key : str
            isomeric canonical SMILES

        """
        chemical_state_key, _ = self._topology_to_smiles(topology)
        return chemical_state_key

    def _find_mol_start_index(self, topology):
        """
        Find the starting index of the molecule in the topology.
        Throws an exception if resname is not present.

        Parameters
        ----------
        topology : app.Topology object
            The topology containing the molecule

        Returns
        -------
        mol_start_idx : int
            start index of the molecule
        mol_length : int
            the number of atoms in the molecule
        """
        resname = self._residue_name
        mol_residues = [res for res in topology.residues() if res.name[:3]==resname[:3]]  # Find the residue by searching for residues with "MOL"
        if len(mol_residues)!=1:
            raise ValueError("There must be exactly one residue with a specific name in the topology. Found %d residues with name '%s'" % (len(mol_residues), resname))
        mol_residue = mol_residues[0]
        atoms = list(mol_residue.atoms())
        mol_start_idx = atoms[0].index
        return mol_start_idx, len(list(atoms))

    def _build_new_topology(self, current_receptor_topology, oemol_proposed):
        """
        Construct a new topology
        Parameters
        ----------
        oemol_proposed : oechem.OEMol object
            the proposed OEMol object
        current_receptor_topology : app.Topology object
            The current topology without the small molecule

        Returns
        -------
        new_topology : app.Topology object
            A topology with the receptor and the proposed oemol
        mol_start_index : int
            The first index of the small molecule
        """
        _logger.info('Building new Topology object...')
        timer_start = time.time()

        oemol_proposed.SetTitle(oemol_proposed.GetTitle())
        mol_topology = forcefield_generators.generateTopologyFromOEMol(oemol_proposed)
        new_topology = app.Topology()
        append_topology(new_topology, current_receptor_topology)
        append_topology(new_topology, mol_topology)
        # Copy periodic box vectors.
        if current_receptor_topology._periodicBoxVectors != None:
            new_topology._periodicBoxVectors = copy.deepcopy(current_receptor_topology._periodicBoxVectors)

        _logger.info('Topology generation took %.3f s' % (time.time() - timer_start))

        return new_topology

    def _remove_small_molecule(self, topology):
        """
        This method removes the small molecule parts of a topology from
        a protein+small molecule complex based on the name of the
        small molecule residue

        Parameters
        ----------
        topology : app.Topology
            Topology with the appropriate residue name.

        Returns
        -------
        receptor_topology : app.Topology
            Topology without small molecule
        """
        receptor_topology = app.Topology()
        append_topology(receptor_topology, topology, exclude_residue_name=self._residue_name)
        # Copy periodic box vectors.
        if topology._periodicBoxVectors != None:
            receptor_topology._periodicBoxVectors = copy.deepcopy(topology._periodicBoxVectors)
        return receptor_topology

    @staticmethod
    def _get_mol_atom_map(current_molecule, proposed_molecule, atom_expr=None, bond_expr=None, verbose=False, allow_ring_breaking=True):
        """
        Given two molecules, returns the mapping of atoms between them using the match with the greatest number of atoms

        Arguments
        ---------
        current_molecule : openeye.oechem.oemol object
             The current molecule in the sampler
        proposed_molecule : openeye.oechem.oemol object
             The proposed new molecule
        allow_ring_breaking : bool, optional, default=True
             If False, will check to make sure rings are not being broken or formed.

        Returns
        -------
        matches : list of match
            list of the matches between the molecules
        """
        _logger.info('Generating atom map...')
        timer_start = time.time()

        atom_expr = atom_expr or DEFAULT_ATOM_EXPRESSION
        bond_expr = bond_expr or DEFAULT_BOND_EXPRESSION

        oegraphmol_current = oechem.OEGraphMol(current_molecule) # pattern molecule
        oegraphmol_proposed = oechem.OEGraphMol(proposed_molecule) # target molecule
        #mcs = oechem.OEMCSSearch(oechem.OEMCSType_Exhaustive)
        mcs = oechem.OEMCSSearch(oechem.OEMCSType_Approximate)
        mcs.Init(oegraphmol_current, atom_expr, bond_expr)
        mcs.SetMCSFunc(oechem.OEMCSMaxBondsCompleteCycles())
        unique = True
        matches = [m for m in mcs.Match(oegraphmol_proposed, unique)]

        def enumerate_cycle_basis(molecule):
            """Enumerate a closed cycle basis of bonds in molecule.

            This uses cycle_basis from NetworkX:
            https://networkx.github.io/documentation/networkx-1.10/reference/generated/networkx.algorithms.cycles.cycle_basis.html#networkx.algorithms.cycles.cycle_basis

            Parameters
            ----------
            molecule : OEMol
                The molecule for a closed cycle basis of Bonds is to be identified

            Returns
            -------
            bond_cycle_basis : list of list of OEBond
                bond_cycle_basis[cycle_index] is a list of OEBond objects that define a cycle in the basis
                You can think of these as the minimal spanning set of ring systems to check.
            """
            import networkx as nx
            g = nx.Graph()
            for atom in molecule.GetAtoms():
                g.add_node(atom.GetIdx())
            for bond in molecule.GetBonds():
                g.add_edge(bond.GetBgnIdx(), bond.GetEndIdx(), bond=bond)
            bond_cycle_basis = list()
            for cycle in nx.cycle_basis(g):
                bond_cycle = list()
                for i in range(len(cycle)):
                    atom_index_1 = cycle[i]
                    atom_index_2 = cycle[(i+1)%len(cycle)]
                    edge = g.edges[atom_index_1,atom_index_2]
                    bond = edge['bond']
                    bond_cycle.append(bond)
                bond_cycle_basis.append(bond_cycle)
            return bond_cycle_basis

        def enumerate_ring_bonds(molecule, ring_membership, ring_index):
            """Enumerate OEBond objects in ring."""
            for bond in molecule.GetBonds():
                if (ring_membership[bond.GetBgnIdx()] == ring_index) and (ring_membership[bond.GetEndIdx()] == ring_index):
                    yield bond

        def breaks_rings_in_transformation(molecule1, molecule2, atom_map):
            """Return True if the transformation from molecule1 to molecule2 breaks rings.

            Parameters
            ----------
            molecule1 : OEMol
                Initial molecule whose rings are to be checked for not being broken
            molecule2 : OEMol
                Final molecule
            atom_map : dict of OEAtom : OEAtom
                atom_map[molecule1_atom] is the corresponding molecule2 atom
            """
            for cycle in enumerate_cycle_basis(molecule1):
                for bond in cycle:
                    # All bonds in this cycle must also be present in molecule2
                    if not ((bond.GetBgn() in atom_map) and (bond.GetEnd() in atom_map)):
                        return True # there are no corresponding atoms in molecule2
                    if not atom_map[bond.GetBgn()].GetBond(atom_map[bond.GetEnd()]):
                        return True # corresponding atoms have no bond in molecule2
            return False # no rings in molecule1 are broken in molecule2

        def preserves_rings(match):
            """Returns True if the transformation allows ring systems to be broken or created."""
            pattern_atoms = { atom.GetIdx() : atom for atom in oegraphmol_current.GetAtoms() }
            target_atoms = { atom.GetIdx() : atom for atom in oegraphmol_proposed.GetAtoms() }

            pattern_to_target_map = { pattern_atoms[matchpair.pattern.GetIdx()] : target_atoms[matchpair.target.GetIdx()] for matchpair in match.GetAtoms() }
            if breaks_rings_in_transformation(oegraphmol_current, oegraphmol_proposed, pattern_to_target_map):
                return False

            target_to_pattern_map = { target_atoms[matchpair.target.GetIdx()] : pattern_atoms[matchpair.pattern.GetIdx()] for matchpair in match.GetAtoms() }
            if breaks_rings_in_transformation(oegraphmol_proposed, oegraphmol_current, target_to_pattern_map):
                return False

            return True

        if allow_ring_breaking is False:
            # Filter the matches to remove any that allow ring breaking
            matches = [m for m in matches if preserves_rings(m)]

        if not matches:
            return {}
        match = max(matches, key=lambda m: m.NumAtoms())
        new_to_old_atom_map = {}
        for matchpair in match.GetAtoms():
            old_index = matchpair.pattern.GetIdx()
            new_index = matchpair.target.GetIdx()

            old_atom = current_molecule.GetAtom(oechem.OEHasAtomIdx(old_index))
            new_atom = proposed_molecule.GetAtom(oechem.OEHasAtomIdx(new_index))

            #Check if a hydrogen was mapped to a non-hydroden (basically the xor of is_h_a and is_h_b)
            if (old_atom.GetAtomicNum() == 1) != (new_atom.GetAtomicNum() == 1):
                continue

            # If the above is not true, then they are either both hydrogens or both not hydrogens
            elif old_atom.GetAtomicNum() == 1:
                bond = list(old_atom.GetBonds())[0] # There is only one for hydrogen
                bgn = bond.GetBgn()
                end = bond.GetEnd()

                # Is this atom the beginning of the bond?
                if bgn.GetIdx() == old_index:
                    other_atom = end
                else:
                    other_atom = bgn

                new_bond = list(new_atom.GetBonds())[0]
                new_bgn = new_bond.GetBgn()
                new_end = new_bond.GetEnd()

                if new_bgn.GetIdx() == new_index:
                    new_other_atom = new_end
                else:
                    new_other_atom = new_bgn

                if new_other_atom.GetAtomicNum() != other_atom.GetAtomicNum():
                    continue


            new_to_old_atom_map[new_index] = old_index

        _logger.info('Atom map took %.3f s' % (time.time() - timer_start))
        return new_to_old_atom_map

    def _propose_molecule(self, system, topology, molecule_smiles, exclude_self=False):
        """
        Propose a new molecule given the current molecule.

        The current scheme uses a probability matrix computed via _calculate_probability_matrix.

        Arguments
        ---------
        system : simtk.openmm.System object
            The current system
        topology : simtk.openmm.app.Topology object
            The current topology
        positions : [n, 3] np.ndarray of floats (Quantity nm)
            The current positions of the system
        molecule_smiles : string
            The current molecule smiles
        exclude_self : bool, optional, default=True
            If True, exclude self-transitions

        Returns
        -------
        proposed_mol_smiles : str
             The SMILES of the proposed molecule
        mol : oechem.OEMol
            The next molecule to simulate
        logp_proposal : float
            contribution from the chemical proposal to the log probability of acceptance (Eq. 36 for hybrid; Eq. 53 for two-stage)
            log [P(Mold | Mnew) / P(Mnew | Mold)]
        """
        # Compute contribution from the chemical proposal to the log probability of acceptance (Eq. 36 for hybrid; Eq. 53 for two-stage)
        # log [P(Mold | Mnew) / P(Mnew | Mold)]

        # Retrieve the current molecule index
        try:
            current_smiles_idx = self._smiles_list.index(molecule_smiles)
        except ValueError as e:
            msg = "Current SMILES string '%s' not found in canonical molecule set.\n"
            msg += "Molecule set: %s" % self._smiles_list
            raise Exception(msg)

        # Propose a new molecule
        molecule_probabilities = self._probability_matrix[current_smiles_idx, :]
        proposed_smiles_idx = np.random.choice(range(len(self._smiles_list)), p=molecule_probabilities)
        reverse_probability = self._probability_matrix[proposed_smiles_idx, current_smiles_idx]
        forward_probability = molecule_probabilities[proposed_smiles_idx]
        proposed_smiles = self._smiles_list[proposed_smiles_idx]
        logp = np.log(reverse_probability) - np.log(forward_probability)
        from perses.tests.utils import smiles_to_oemol
        proposed_mol = smiles_to_oemol(proposed_smiles, "MOL_%d" %proposed_smiles_idx)
        return proposed_smiles, proposed_mol, logp

    def _calculate_probability_matrix(self, molecule_smiles_list):
        """
        Calculate the matrix of probabilities of choosing A | B
        based on normalized MCSS overlap. Does not check for torsions!
        Parameters
        ----------
        molecule_smiles_list : list of str
            list of molecules to be potentially selected

        Returns
        -------
        probability_matrix : [n, n] np.ndarray
            probability_matrix[Mold, Mnew] is the probability of choosing molecule Mnew given the current molecule is Mold

        """
        n_smiles = len(molecule_smiles_list)
        probability_matrix = np.zeros([n_smiles, n_smiles])
        for i in range(n_smiles):
            for j in range(i):
                current_mol = oechem.OEMol()
                proposed_mol = oechem.OEMol()
                oechem.OESmilesToMol(current_mol, molecule_smiles_list[i])
                oechem.OESmilesToMol(proposed_mol, molecule_smiles_list[j])
                atom_map = self._get_mol_atom_map(current_mol, proposed_mol, atom_expr=self.atom_expr, bond_expr=self.bond_expr)
                if not atom_map:
                    n_atoms_matching = 0
                    continue
                n_atoms_matching = len(atom_map.keys())
                probability_matrix[i, j] = n_atoms_matching
                probability_matrix[j, i] = n_atoms_matching
        #normalize the rows:
        for i in range(n_smiles):
            row_sum = np.sum(probability_matrix[i, :])
            try:
                probability_matrix[i, :] /= row_sum
            except ZeroDivisionError:
                print("One molecule is completely disconnected!")
                raise

        if self._storage:
            self._storage.write_object('molecule_smiles_list', molecule_smiles_list)
            self._storage.write_array('probability_matrix', probability_matrix)

        return probability_matrix

    @property
    def chemical_state_list(self):
         return self._smiles_list

[docs]    @staticmethod
    def clean_molecule_list(smiles_list, atom_opts, bond_opts):
        """
        A utility function to determine which molecules can be proposed
        from any other molecule.

        Parameters
        ----------
        smiles_list
        atom_opts
        bond_opts

        Returns
        -------
        safe_smiles
        removed_smiles
        """
        from perses.tests.utils import smiles_to_topology
        import itertools
        from perses.rjmc.geometry import ProposalOrderTools
        from perses.tests.test_geometry_engine import oemol_to_openmm_system
        safe_smiles = set()
        smiles_pairs = set()
        smiles_set = set(smiles_list)

        for mol1, mol2 in itertools.combinations(smiles_list, 2):
            smiles_pairs.add((mol1, mol2))

        for smiles_pair in smiles_pairs:
            topology_1, mol1 = smiles_to_topology(smiles_pair[0])
            topology_2, mol2 = smiles_to_topology(smiles_pair[1])
            try:
                sys1, pos1, top1 = oemol_to_openmm_system(mol1)
                sys2, pos2, top2 = oemol_to_openmm_system(mol2)
            except:
                continue
            new_to_old_atom_map = SmallMoleculeSetProposalEngine._get_mol_atom_map(mol1, mol2, atom_expr=atom_opts, bond_expr=bond_opts)
            if not new_to_old_atom_map:
                continue
            top_proposal = TopologyProposal(new_topology=top2, old_topology=top1, new_system=sys2, old_system=sys1, new_to_old_atom_map=new_to_old_atom_map, new_chemical_state_key='e', old_chemical_state_key='w')
            proposal_order = ProposalOrderTools(top_proposal)
            try:
                forward_order = proposal_order.determine_proposal_order(direction='forward')
                reverse_order = proposal_order.determine_proposal_order(direction='reverse')
                safe_smiles.add(smiles_pair[0])
                safe_smiles.add(smiles_pair[1])
                _logger.info("Adding %s and %s" % (smiles_pair[0], smiles_pair[1]))
            except NoTorsionError:
                pass
        removed_smiles = smiles_set.difference(safe_smiles)
        return safe_smiles, removed_smiles

[docs]class PremappedSmallMoleculeSetProposalEngine(SmallMoleculeSetProposalEngine):
    """
    This proposal engine uses the SmallMoleculeAtomMapper to have all atoms premapped and the proposal distribution pre-formed and checked.
    It is intended to be substantially faster, as well as more robust (having excluded mappings that would not lead to a valid geometry proposal)
    """

[docs]    def __init__(self, atom_mapper: SmallMoleculeAtomMapper, system_generator: SystemGenerator, residue_name: str="MOL", storage: NetCDFStorageView=None):
        self._atom_mapper = atom_mapper
        self._atom_mapper.map_all_molecules()
        self._atom_mapper.generate_and_check_proposal_matrix()
        self._proposal_matrix = self._atom_mapper.proposal_matrix

        self._n_molecules = self._atom_mapper.n_molecules
        
        super(PremappedSmallMoleculeSetProposalEngine, self).__init__(self._atom_mapper.smiles_list, system_generator, residue_name=residue_name,
                 proposal_metadata=None, storage=storage,
                 always_change=True)

[docs]    def propose(self, current_system, current_topology, current_smiles=None, proposed_mol=None, map_index=None, current_metadata=None):
        """
        Propose the next state, given the current state

        Parameters
        ----------
        current_system : openmm.System object
            the system of the current state
        current_topology : app.Topology object
            the topology of the current state
        current_smiles : str, default None
            Specify the current SMILES string to avoid perceiving it from the topology. If None, perceive from topology.
        proposed_mol : oechem.OEMol, optional
            the molecule to propose. If None, choose randomly based on the current molecule
        map_index : int, default None
            The index of the atom map to use. If None, choose randomly. Otherwise, use map idx of map_index mod n_maps
        current_metadata : dict
            dict containing current smiles as a key

        Returns
        -------
        proposal : TopologyProposal object
           topology proposal object
        """
        # Determine SMILES string for current small molecule if the SMILES isn't specified
        if current_smiles is None:
            current_mol_smiles, _ = self._topology_to_smiles(current_topology)
        else:
            current_mol_smiles = current_smiles

        # Remove the small molecule from the current Topology object
        current_receptor_topology = self._remove_small_molecule(current_topology)

        # Find the initial atom index of the small molecule in the current topology
        old_mol_start_index, len_old_mol = self._find_mol_start_index(current_topology)

        # Determine atom indices of the small molecule in the current topology
        old_alchemical_atoms = range(old_mol_start_index, len_old_mol)

        # Select the next molecule SMILES given proposal probabilities
        current_mol_index = self._atom_mapper.get_smiles_index(current_mol_smiles)

        #If we aren't specifying a proposed molecule, then randomly propose one:
        if proposed_mol is None:
            #get probability vector for proposal
            proposal_probability = self._proposal_matrix[current_mol_index, :]

            #propose next index
            proposed_index = np.random.choice(range(self._n_molecules), p=proposal_probability)

            #proposal logp
            proposed_logp = np.log(proposal_probability[proposed_index])

            #reverse proposal logp
            reverse_logp = np.log(self._proposal_matrix[proposed_index, current_mol_index])

            #logp overall of proposal
            logp_proposal = reverse_logp - proposed_logp

            #get the oemol corresponding to the proposed molecule:
            proposed_mol_smiles = self._atom_mapper.smiles_list[proposed_index]
            proposed_mol = self._atom_mapper.get_oemol_from_smiles(proposed_mol_smiles)

        else:
            logp_proposal = 0.0
            proposed_mol_smiles = oechem.OECreateSmiString(proposed_mol, OESMILES_OPTIONS)

        #You will get a weird error if you don't assign atom names.
        oechem.OETriposAtomNames(proposed_mol)

        # Build the new Topology object, including the proposed molecule
        new_topology = self._build_new_topology(current_receptor_topology, proposed_mol)
        new_mol_start_index, len_new_mol = self._find_mol_start_index(new_topology)

        # Generate an OpenMM System from the proposed Topology
        new_system = self._system_generator.build_system(new_topology)

        # Determine atom mapping between old and new molecules
        mol_atom_maps = self._atom_mapper.get_atom_maps(current_mol_smiles, proposed_mol_smiles)

        #If no map index is given, just randomly choose one:
        if map_index is None:
            mol_atom_map = np.random.choice(mol_atom_maps)

        #Otherwise, pick the map whose index is the specified index mod n_maps
        else:
            n_atom_maps = len(mol_atom_maps)
            index_to_choose = map_index % n_atom_maps
            mol_atom_map = mol_atom_maps[index_to_choose]

        # Adjust atom mapping indices for the presence of the receptor
        adjusted_atom_map = {}
        for (key, value) in mol_atom_map.items():
            adjusted_atom_map[key+new_mol_start_index] = value + old_mol_start_index

        # Incorporate atom mapping of all environment atoms
        old_mol_offset = len_old_mol
        for i in range(new_mol_start_index):
            if i >= old_mol_start_index:
                old_idx = i + old_mol_offset
            else:
                old_idx = i
            adjusted_atom_map[i] = old_idx

        # Create the TopologyProposal onbject
        proposal = TopologyProposal(logp_proposal=logp_proposal, new_to_old_atom_map=adjusted_atom_map,
            old_topology=current_topology, new_topology=new_topology,
            old_system=current_system, new_system=new_system,
            old_alchemical_atoms=old_alchemical_atoms,
            old_chemical_state_key=current_mol_smiles, new_chemical_state_key=proposed_mol_smiles)

        ndelete = proposal.old_system.getNumParticles() - len(proposal.old_to_new_atom_map.keys())
        ncreate = proposal.new_system.getNumParticles() - len(proposal.old_to_new_atom_map.keys())
        _logger.info('Proposed transformation would delete %d atoms and create %d atoms.' % (ndelete, ncreate))

        return proposal


class TwoMoleculeSetProposalEngine(SmallMoleculeSetProposalEngine):
    """
    Dummy proposal engine that always chooses the new molecule of the two, but uses the base class's atom mapping
    functionality.
    """

    def __init__(self, old_mol, new_mol, system_generator, residue_name='MOL', atom_expr=None, bond_expr=None, proposal_metadata=None, storage=None, always_change=True, atom_map=None):
        self._old_mol_smiles = oechem.OECreateSmiString(old_mol, OESMILES_OPTIONS)
        self._new_mol_smiles = oechem.OECreateSmiString(new_mol, OESMILES_OPTIONS)
        self._old_mol = old_mol
        self._new_mol = new_mol

        super(TwoMoleculeSetProposalEngine, self).__init__([self._old_mol_smiles, self._new_mol_smiles], system_generator, residue_name=residue_name, atom_expr=atom_expr, bond_expr=bond_expr, atom_map=atom_map)

        self._allow_ring_breaking = False # don't allow ring breaking

    def _propose_molecule(self, system, topology, molecule_smiles, exclude_self=False):
        return self._new_mol_smiles, self._new_mol, 0.0

[docs]class NullProposalEngine(SmallMoleculeSetProposalEngine):
    """
    Base class for NaphthaleneProposalEngine and ButantProposalEngine
    Not intended for use on its own

    """
[docs]    def __init__(self, system_generator, residue_name="MOL", atom_expr=None, bond_expr=None, proposal_metadata=None, storage=None, always_change=True):
        super(NullProposalEngine, self).__init__(list(), system_generator, residue_name=residue_name)
        self._fake_states = ["A","B"]
        self.smiles = ''

[docs]    def propose(self, current_system, current_topology, current_metadata=None):
        """
        Custom proposal for NaphthaleneTestSystem will switch from current naphthalene
        "state" (either 'naphthalene-A' or 'naphthalene-B') to the other

        This proposal engine can only be used with input topology of
        naphthalene, and propose() will first confirm naphthalene is residue "MOL"
        The topology may have water but should not have any other
        carbon-containing residue.

        The "new" system and topology are deep copies of the old, but the atom_map
        is custom defined to only match one of the two rings.

        Arguments:
        ----------
        current_system : openmm.System object
            the system of the current state
        current_topology : app.Topology object
            the topology of the current state
        current_metadata : dict, OPTIONAL
            Not implemented

        Returns:
        -------
        proposal : TopologyProposal object
           topology proposal object

        """
        given_smiles = super(NullProposalEngine, self).compute_state_key(current_topology)
        if self.smiles != given_smiles:
            raise(Exception("{0} can only be used with {1} topology; smiles of given topology is: {2}".format(type(self), self.smiles, given_smiles)))

        old_key = current_topology._state_key
        new_key = [key for key in self._fake_states if key != old_key][0]

        new_topology = app.Topology()
        append_topology(new_topology, current_topology)
        new_topology._state_key = new_key
        new_system = copy.deepcopy(current_system)
        atom_map = self._make_skewed_atom_map(current_topology)
        proposal = TopologyProposal(new_topology=new_topology, new_system=new_system, old_topology=current_topology, old_system=current_system, logp_proposal=0.0,
                                                 new_to_old_atom_map=atom_map, old_chemical_state_key=old_key, new_chemical_state_key=new_key)
        return proposal

    def _make_skewed_atom_map(topology):
        return dict()

[docs]    def compute_state_key(self, topology):
        """
        For this test system, the topologies for the two states are
        identical; therefore a custom attribute `_state_key` has
        been added to the topology itself, to track whether a switch
        has been accepted

        compute_state_key will return topology._state_key rather than
        SMILES, because SMILES are identical in the two states.
        """
        return topology._state_key


[docs]class NaphthaleneProposalEngine(NullProposalEngine):
    """
    Custom ProposalEngine to use with NaphthaleneTestSystem defines two "states"
    of naphthalene, identified 'naphthalene-A' and 'naphthalene-B', which are
    tracked by adding a custom _state_key attribute to the topology

    Generates TopologyProposal from naphthalene to naphthalene, only matching
    one ring such that geometry must rebuild the other

    Can only be used with input topology of naphthalene

    Constructor Arguments:
        system_generator, SystemGenerator object
            SystemGenerator initialized with the appropriate forcefields
        residue_name, OPTIONAL,  str
            Default = "MOL"
            The name that will be used for small molecule residues in the topology
        atom_expr, OPTIONAL, oechem.OEExprOpts
            Default is None
            Currently not implemented -- would dictate how match is defined
        bond_expr, OPTIONAL, oechem.OEExprOpts
            Default is None
            Currently not implemented -- would dictate how match is defined
        proposal_metadata, OPTIONAL, dict
            Default is None
            metadata for the proposal engine
        storage, OPTIONAL, NetCDFStorageView
            Default is None
            If specified, write statistics to this storage layer
        always_change, OPTIONAL, bool
            Default is True
            Currently not implemented -- will always behave as True
            The proposal will always be from the current "state" to the other
            Self proposals will never be made
    """
[docs]    def __init__(self, system_generator, residue_name="MOL", atom_expr=None, bond_expr=None, proposal_metadata=None, storage=None, always_change=True):
        super(NaphthaleneProposalEngine, self).__init__(system_generator, residue_name=residue_name, atom_expr=atom_expr, bond_expr=bond_expr, proposal_metadata=proposal_metadata, storage=storage, always_change=always_change)
        self._fake_states = ["naphthalene-A", "naphthalene-B"]
        self.smiles = 'c1ccc2ccccc2c1'

    def _make_skewed_atom_map(self, topology):
        """
        Custom definition for the atom map between naphthalene and naphthalene

        If a regular atom map was constructed (via oechem.OEMCSSearch), all
        atoms would be matched, and the geometry engine would have nothing
        to add.  This method manually finds one of the two naphthalene rings
        and matches it to itself, rotated 180degrees.  The second ring and
        all hydrogens will have to be repositioned by the geometry engine.

        The rings are identified by finding how many other carbons each
        carbon in the topology is bonded to.  This will be 2 for all carbons
        in naphthalene except the two carbon atoms shared by both rings.
        Starting with these 2 shared atoms, a ring is traced by following
        bonds between adjacent carbons until returning to the shared carbons.

        Arguments:
        ----------
        topology : app.Topology object
            topology of naphthalene
            Only one topology is needed, because current and proposed are
            identical
        Returns:
        --------
        atom_map : dict
            maps the atom indices of carbons from one ring back to themselves
        """
        # make dict of carbons in topology to list of carbons they're bonded to
        carbon_bonds = dict()
        for atom in topology.atoms():
            if atom.element == app.element.carbon:
                carbon_bonds[atom] = set()
        for bond in topology.bonds():
            if bond[0].element == app.element.carbon and bond[1].element == app.element.carbon:
                carbon_bonds[bond[0]].add(bond[1])
                carbon_bonds[bond[1]].add(bond[0])

        # identify the rings by finding 2 middle carbons then tracing their bonds
        find_ring = list()
        for carbon, bounds in carbon_bonds.items():
            if len(bounds) == 3:
                find_ring.append(carbon)
        assert len(find_ring) == 2

        add_next = [carbon for carbon in carbon_bonds[find_ring[1]] if carbon not in find_ring]
        while len(add_next) > 0:
            find_ring.append(add_next[0])
            add_next = [carbon for carbon in carbon_bonds[add_next[0]] if carbon not in find_ring]
        assert len(find_ring) == 6

        # map the ring flipped 180
        atom_map = {find_ring[i].index : find_ring[(i+3)%6].index for i in range(6)}
        return atom_map

[docs]class ButaneProposalEngine(NullProposalEngine):
    """
    Custom ProposalEngine to use with ButaneTestSystem defines two "states"
    of butane, identified 'butane-A' and 'butane-B', which are
    tracked by adding a custom _state_key attribute to the topology

    Generates TopologyProposal from butane to butane, only matching
    two carbons such that geometry must rebuild the others

    Can only be used with input topology of butane

    Constructor Arguments:
        system_generator, SystemGenerator object
            SystemGenerator initialized with the appropriate forcefields
        residue_name, OPTIONAL,  str
            Default = "MOL"
            The name that will be used for small molecule residues in the topology
        atom_expr, OPTIONAL, oechem.OEExprOpts
            Default is None
            Currently not implemented -- would dictate how match is defined
        bond_expr, OPTIONAL, oechem.OEExprOpts
            Default is None
            Currently not implemented -- would dictate how match is defined
        proposal_metadata, OPTIONAL, dict
            Default is None
            metadata for the proposal engine
        storage, OPTIONAL, NetCDFStorageView
            Default is None
            If specified, write statistics to this storage layer
        always_change, OPTIONAL, bool
            Default is True
            Currently not implemented -- will always behave as True
            The proposal will always be from the current "state" to the other
            Self proposals will never be made
    """
[docs]    def __init__(self, system_generator, residue_name="MOL", atom_expr=None, bond_expr=None, proposal_metadata=None, storage=None, always_change=True):
        super(ButaneProposalEngine, self).__init__(system_generator, residue_name=residue_name, atom_expr=atom_expr, bond_expr=bond_expr, proposal_metadata=proposal_metadata, storage=storage, always_change=always_change)
        self._fake_states = ["butane-A", "butane-B"]
        self.smiles = 'CCCC'

    def _make_skewed_atom_map_old(self, topology):
        """
        Custom definition for the atom map between butane and butane

        (disabled)

        MAP:

                   H - C - CH- C - C - H
          H - C - CH - C - CH- H

        If a regular atom map was constructed (via oechem.OEMCSSearch), all
        atoms would be matched, and the geometry engine would have nothing
        to add.  This method manually finds two of the four carbons and
        matches them to each other, rotated 180degrees.  The other two carbons
        and hydrogens will have to be repositioned by the geometry engine.

        The two carbons are chosen by finding the first carbon-carbon bond in
        the topology. Because geometry needs at least 3 atoms in the atom_map,
        one H from each carbon is also found and mapped to the other.

        Arguments:
        ----------
        topology : app.Topology object
            topology of butane
            Only one topology is needed, because current and proposed are
            identical
        Returns:
        --------
        atom_map : dict
            maps the atom indices of 2 carbons to each other
        """
        for bond in topology.bonds():
            if all([atom.element == app.element.carbon for atom in bond]):
                ccbond = bond
                break
        for bond in topology.bonds():
            if ccbond[0] in bond and any([atom.element == app.element.hydrogen for atom in bond]):
                hydrogen0 = [atom for atom in bond if atom.element == app.element.hydrogen][0]
            if ccbond[1] in bond and any([atom.element == app.element.hydrogen for atom in bond]):
                hydrogen1 = [atom for atom in bond if atom.element == app.element.hydrogen][0]

        atom_map = {
            ccbond[0].index : ccbond[1].index,
            ccbond[1].index : ccbond[0].index,
            hydrogen0.index : hydrogen1.index,
            hydrogen1.index : hydrogen0.index,
        }
        return atom_map

    def _make_skewed_atom_map(self, topology):
        """
        Custom definition for the atom map between butane and butane

        MAP:

        C - C - C - C
            C - C - C - C

        Arguments:
        ----------
        topology : app.Topology object
            topology of butane
            Only one topology is needed, because current and proposed are
            identical
        Returns:
        --------
        atom_map : dict
            maps the atom indices of 2 carbons to each other
        """

        carbons = [ atom for atom in topology.atoms() if (atom.element == app.element.carbon) ]
        neighbors = { carbon : set() for carbon in carbons }
        for (atom1,atom2) in topology.bonds():
            if (atom1.element == app.element.carbon) and (atom2.element == app.element.carbon):
                neighbors[atom1].add(atom2)
                neighbors[atom2].add(atom1)
        end_carbons = list()
        for carbon in carbons:
            if len(neighbors[carbon]) == 1:
                end_carbons.append(carbon)

        # Extract linear chain of carbons
        carbon_chain = list()
        last_carbon = end_carbons[0]
        carbon_chain.append(last_carbon)
        finished = False
        while (not finished):
            next_carbons = list(neighbors[last_carbon].difference(carbon_chain))
            if len(next_carbons) == 0:
                finished = True
            else:
                carbon_chain.append(next_carbons[0])
                last_carbon = next_carbons[0]

        atom_map = {
            carbon_chain[0].index : carbon_chain[2].index,
            carbon_chain[1].index : carbon_chain[1].index,
            carbon_chain[2].index : carbon_chain[0].index,
        }
        return atom_map

[docs]class PropaneProposalEngine(NullProposalEngine):
    """
    Custom ProposalEngine to use with PropaneTestSystem defines two "states"
    of butane, identified 'propane-A' and 'propane-B', which are
    tracked by adding a custom _state_key attribute to the topology

    Generates TopologyProposal from propane to propane, only matching
    one CH3 and the middle C such that geometry must rebuild the other

    Can only be used with input topology of propane

    Constructor Arguments:
        system_generator, SystemGenerator object
            SystemGenerator initialized with the appropriate forcefields
        residue_name, OPTIONAL,  str
            Default = "MOL"
            The name that will be used for small molecule residues in the topology
        atom_expr, OPTIONAL, oechem.OEExprOpts
            Default is None
            Currently not implemented -- would dictate how match is defined
        bond_expr, OPTIONAL, oechem.OEExprOpts
            Default is None
            Currently not implemented -- would dictate how match is defined
        proposal_metadata, OPTIONAL, dict
            Default is None
            metadata for the proposal engine
        storage, OPTIONAL, NetCDFStorageView
            Default is None
            If specified, write statistics to this storage layer
        always_change, OPTIONAL, bool
            Default is True
            Currently not implemented -- will always behave as True
            The proposal will always be from the current "state" to the other
            Self proposals will never be made
    """
[docs]    def __init__(self, system_generator, residue_name="MOL", atom_expr=None, bond_expr=None, proposal_metadata=None, storage=None, always_change=True):
        super(PropaneProposalEngine, self).__init__(system_generator, residue_name=residue_name, atom_expr=atom_expr, bond_expr=bond_expr, proposal_metadata=proposal_metadata, storage=storage, always_change=always_change)
        self._fake_states = ["propane-A", "propane-B"]
        self.smiles = 'CCC'

    def _make_skewed_atom_map(self, topology):
        """
        Custom definition for the atom map between propane and propane

        If a regular atom map was constructed (via oechem.OEMCSSearch), all
        atoms would be matched, and the geometry engine would have nothing
        to add.  This method manually finds CH3-CH2 and matches each atom to
        itself.  The other carbon and three hydrogens will have to be
        repositioned by the geometry engine.

        The two carbons are chosen by finding the first carbon-carbon bond in
        the topology. To minimize the atoms being rebuilt by geometry, all
        hydrogens from each of the selected carbons are also found and
        mapped to themselves.

        Arguments:
        ----------
        topology : app.Topology object
            topology of propane
            Only one topology is needed, because current and proposed are
            identical
        Returns:
        --------
        atom_map : dict
            maps the atom indices of CH3-CH2 to themselves
        """
        atom_map = dict()
        for bond in topology.bonds():
            if all([atom.element == app.element.carbon for atom in bond]):
                ccbond = bond
                break
        for bond in topology.bonds():
            if any([carbon in bond for carbon in ccbond]) and app.element.hydrogen in [atom.element for atom in bond]:
                atom_map[bond[0].index] = bond[0].index
                atom_map[bond[1].index] = bond[1].index
        return atom_map
Source code for perses.rjmc.topology_proposal

perses

Navigation

Related Topics