"""
Utilities for sculpt complex.
Copyright Schrodinger, LLC. All rights reserved.
"""
import argparse
import itertools
import random
import re
from collections import namedtuple
import numpy
from schrodinger import structure
from schrodinger.application.matsci import buildcomplex
from schrodinger.application.matsci import etatoggle as tesu
from schrodinger.application.matsci import msutils
from schrodinger.application.matsci.nano import constants
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import minimize
from schrodinger.structutils import rmsd
from schrodinger.structutils import transform
SITE_PROP = 'i_matsci_coordination_site'
ANCHOR_PROP = 'i_matsci_anchor_index'
HYDROGEN = 'H'
FRAG_DICT = constants.Constants().TERMDICT.copy()
FRAG_DICT['hydrogen'] = [HYDROGEN, HYDROGEN]
FRAG_INFO = ', '.join(['"%s"' % x for x in FRAG_DICT.keys()])
SiteAtom = namedtuple('SiteAtom',
['entry_id', 'element', 'entry_index', 'ws_index'])
[docs]class SculptingError(Exception):
pass
[docs]class CoordinationInfo(object):
"""
Holds and yields information about atoms and groups that coordinate at
specific sites
"""
[docs] def __init__(self):
"""
Create a CoordinationInfo object
"""
self.sites = []
[docs] def addSite(self, site):
"""
Add information about the next site. Information should be added in the
order of the sites i.e. add information about site 1 first, then site
2...
:type site: `SiteAtom` or str
:param site: For sites occupied by ligand atoms, pass in the SiteAtom
for the ligand atom. For sites occupied by capping groups, pass in
the str name of the capping group.
"""
self.sites.append(site)
[docs] def ligandSites(self):
"""
A generator over all the sites attached to the ligand
:rtype: (int, `SiteAtom`)
:return: Iterates over pairs of index number (zero-based) and the
SiteAtom object associated with that site
"""
for index, site in enumerate(self.sites):
if isinstance(site, SiteAtom):
yield index, site
[docs] def cappedSites(self):
"""
A generator over all the sites capped by capping groups
:rtype: (int, str)
:return: Iterates over pairs of index number (zero-based) and the
name of the capping group. The name will be a key to the FRAG_DICT
dictionary
"""
for index, site in enumerate(self.sites):
if isinstance(site, str):
yield index, site
[docs] def allGroupsAreCapping(self):
"""
Check if all sites are capped (i.e. there are no ligand attachments)
:rtype: bool
:return: True if all sites are capped
"""
return not bool(list(self.ligandSites()))
[docs]def validate_coord_flag(coordinators):
"""
Validate that the -coord flag argument is correct and parse its information
:type coordinators: str
:param coordinators: The command line argument for the -coord flag
:rtype: (`CoordinationInfo`, list[str])
:return: The CoordinationInfo object with the command line information and
strings of corresponding VSEPR geometries. The strings will be
from the buildcomplex geometry constants (OCTAHEDRAL, etc.)
:raise `argparse.ArgumentTypeError`: If something is wrong with the input
"""
min_slots = min(buildcomplex.SLOTS_TO_GEOMS.keys())
max_slots = max(buildcomplex.SLOTS_TO_GEOMS.keys())
tokens = coordinators.split(',')
numtokens = len(tokens)
if numtokens < min_slots or numtokens > max_slots:
raise argparse.ArgumentTypeError(
'The number of -coord sites must correspond to a supported '
'VSEPR geometry - at least %d and no more than %d. Got %d instead.'
% (min_slots, max_slots, numtokens))
geometries = buildcomplex.SLOTS_TO_GEOMS[numtokens]
coordination_info = CoordinationInfo()
for token in tokens:
try:
index = int(token)
except ValueError:
if token in FRAG_DICT:
value = token
else:
raise argparse.ArgumentTypeError(
'Each WHAT value for the -coord flag must be either an '
'integer or one of the following values: %s. Got %s instead'
'.' % (FRAG_INFO, token))
else:
if index < 1:
raise argparse.ArgumentTypeError(
'Integer WHAT values for the -coord flag must be greater '
'than 0. Got %d instead.' % index)
value = SiteAtom(entry_id=None,
element=None,
ws_index=None,
entry_index=index)
coordination_info.addSite(value)
return coordination_info, geometries
[docs]class Sculptor(object):
"""
Does the work of sculpting a ligand around a metal atom and capping open
valences with small capping groups.
"""
[docs] def __init__(self, element, coordination_info, geometry, del_h, title,
enumerating):
"""
:type element: str
:param element: The atomic symbol of the central atom
:type coordination_info: `CoordinationInfo`
:param coordination_info: The CoordinationInfo that gives the
information about each coordination site
:type geometry: str
:param geometry: One of the VSEPR string constants from
schrodinger.application.matsci.buildcomplex (OCTAHEDRAL, etc.)
:type del_h: bool
:param del_h: Whether to delete H atoms at the ligand coordination sites
:type title: str
:param title: The base title for structures created by this Sculptor.
:type enumerating: bool
:param enumerating: True if enumerating multiple ligand structures
"""
self.element = element
self.coordination_info = coordination_info
self.geometry = geometry
self.del_h = del_h
self.title = title
self.enumerating = enumerating
self.locations = self.fixLocations()
[docs] def sculpt(self, ligstruct, optimize=True):
"""
Create a sculpted complex from the given ligand structure
:type ligstruct: `schrodinger.structure.Structure`
:param ligstruct: The structure of the ligand
:type optimize: bool
:param optimize: whether to perform a standard geometry optimization
on the final sculpted complex
:raise SculptingError: Too many metal-to-ligand bonds
:rtype: `schrodinger.structure.Structure`
:return: The structure of the complex
"""
self.struct = None
# Gather data necessary for building the complex
if self.coordination_info.allGroupsAreCapping():
self.ligands = []
else:
self.ligands = self.getLigandStructure(ligstruct)
self.alignLigands()
# Create the unminimized ligand structure
self.struct = self.createBasicStructure()
self.addCappingLigands()
self.bondLigandsToMetal()
self.addIdealAnchors()
self._addCoordinationNoise()
# for complexes with eta bound ligands if the central
# atom is not a metal, like for example is a carbon, then
# the sculpting minimizer below will raise a force field
# error so during that step temporarily use a metal
has_eta = any([
atom.atomic_number == -2
for atom, site in self.coordinatingAtoms(self.struct)
])
metal_idxs = analyze.evaluate_asl(self.struct, 'metals')
use_temp_metal = has_eta and 1 not in metal_idxs
if use_temp_metal:
self.struct.atom[1].element = 'Ir'
# Use the minimizer to force ligands to ideal binding positions
try:
self.minimizeIdealComplex(ligstruct.title)
except ValueError:
msg = (f'Error for ligand {ligstruct.title}:\nUnable to determine '
'Lewis structure so unable to create complex')
raise SculptingError(msg)
except RuntimeError as err:
self._handleMinimizationRuntimeError(err)
self.deleteAnchors()
# for complexes with eta bound ligands toggle from the input
# dummy atom representation prior to minimization
if has_eta:
try:
tesu.toggle_structure(self.struct)
except ValueError as err:
raise SculptingError(str(err))
if use_temp_metal:
self.struct.atom[1].element = self.element
# Fully optimize the clean (no dummy atoms, no restraints) complex,
# the exception can happen for a central "metal" carbon eta bound to
# ethene which results in carbon having a valence of 3 rather than 4
if optimize:
try:
buildcomplex.minimize_complex(self.struct,
forcefield=minimize.OPLS3)
except ValueError:
pass
# Set the title
title = self.title
if self.enumerating:
title += ' ' + ligstruct.title
self.struct.title = title
self._cleanAtomProperties(self.struct)
return self.struct
def _handleMinimizationRuntimeError(self, err):
"""
Raise a better error when minimization runtime errors occur
:param RuntimeError err: The error that was raised
:raise SculptingError: An error with an explanatory message
"""
error = str(err)
result = re.search(r'overlapping atoms (\d+) (\d+)', error)
if result:
atom1 = result.group(1)
atom2 = result.group(2)
msg = ('Unable to minimize complex geometry due to overlapping '
f'atoms {atom1} and {atom2}')
raise SculptingError(msg)
else:
# This is not the recognized overlapping atoms error
raise SculptingError(error)
def _cleanAtomProperties(self, struct):
"""
Remove atom properties used in sculpting from this structure to avoid
stale properties
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to remove atoms properties from
"""
# See MATSCI-5406
for prop in (SITE_PROP, ANCHOR_PROP):
msutils.remove_atom_property(struct, prop)
def _addCoordinationNoise(self):
# Add a tiny bit of noise to avoid any complete overlap between site
# atoms and their target dummy atoms because atoms at the exact same
# location cause a minimizer error, even if one is a dummy
for atom in self.struct.atom[1].bonded_atoms:
atom.xyz = [a + random.uniform(0.001, 0.003) for a in atom.xyz]
[docs] def getLigandStructure(self, struct):
"""
Fix up struct to prepare it for binding in the complex
:type struct: `schrodinger.structure.Structure`
:param struct: The raw structure of the ligand
:rtype: list
:return: The fixed up struct. A list is returned to maintain API
compatibitily with code that was implemented for
self.getLigandsFromMultiStructures. Left this way in case we
implement multiple ligand entries in the future.
"""
struct = struct.copy()
# MATSCI-5406
self._cleanAtomProperties(struct)
# Mark the coordinating atoms
for sitenum, info in self.coordination_info.ligandSites():
struct.atom[info.entry_index].property[SITE_PROP] = sitenum
# Remove hydrogens if requested
if self.del_h:
delatoms = []
for atom, site in self.coordinatingAtoms(struct):
for neighbor in atom.bonded_atoms:
if neighbor.element == 'H':
delatoms.append(neighbor.index)
struct.deleteAtoms(delatoms)
# Split the ligand structures up if the coordinating sites are on
# different molecules - must do this after deleting hydrogens just in
# case that creates new molecules (MATSCI-9739)
ligands = [struct]
mols = {x.molecule_number for x, y in self.coordinatingAtoms(struct)}
if len(mols) > 1:
delatoms = []
mols = sorted(list(mols))
# We extract a new ligand structure for each molecule bound to the
# metal after the first. This leaves the first bound molecule and
# any spectator molecules (those not bound to the metal) in the
# first structure.
for molnum in mols[1:]:
molecule = struct.molecule[molnum]
delatoms.extend(molecule.getAtomIndices())
ligands.append(molecule.extractStructure())
struct.deleteAtoms(delatoms)
return ligands
[docs] def coordinatingAtoms(self, struct):
"""
A generator over all the atoms in the structure that bind or will bind
to the metal
:type struct: `schrodinger.structure.Structure`
:param struct: The structure containing the atoms. Can be either a
ligand or the full complex.
:rtype: `schrodinger.structure._StructureAtom`, int
:return: An atom that binds/will bind to the metal and the coordination
site it occupies/will occupy
"""
for atom in struct.atom:
site = atom.property.get(SITE_PROP)
if site is not None:
yield atom, site
[docs] def fixLocations(self):
"""
Reorder the ideal locations list to correspond to the reordered
coordination sites in the siteselectors.
:rtype: list
:return: The list of ideal coordination sitess reordered the to same
order as shown in the GUI site selection diagrams. Each item of the
list is an XYZ tuple giving the coordination site if the metal is at
(0, 0, 0)
"""
raw_locations = buildcomplex.IDEAL_SLOTS[self.geometry]
if self.geometry == buildcomplex.OCTAHEDRAL:
order = [0, 1, 4, 3, 2, 5]
elif self.geometry == buildcomplex.SQUARE_PLANAR:
order = [0, 1, 3, 2]
else:
order = range(len(raw_locations))
return [raw_locations[x] for x in order]
[docs] def alignLigands(self):
"""
Align the ligands to good binding location, ensuring that the
coordinating atoms are near (as possible) to the coordination sites and
that the ligand is flipped to a reasonable orientation
"""
framework = structure.create_new_structure()
for loc in self.locations + [(0.0, 0.0, 0.0)]:
framework.addAtom('Du', *loc)
for lig in self.ligands:
siteatoms = [x[0] for x in self.coordinatingAtoms(lig)]
if len(siteatoms) == 1:
self._alignMonodentate(lig, siteatoms, framework)
elif len(siteatoms) == 2:
self._alignBidentate(lig, siteatoms, framework)
else:
self._alignManydentate(lig, siteatoms, framework)
def _alignMonodentate(self, ligand, siteatoms, framework):
"""
Align monodentate ligands to good binding location, ensuring that the
anticipated bond from ligand to metal overlaps the ideal bond direction.
Note that the rotation of the ligand about this bond is not optimized.
:type ligands: list
:param ligands: List of ligand structures
:type siteatoms: list
:param siteatoms: Each item is the _StructureAtom object for a
coordinating atom
:type framework: `schrodinger.structure.Structure`
:param framework: A mock structure containing a dummy at each ideal
coordination location (in the same order as the coordination sites)
and a dummy at the metal atom position as the final atom.
"""
siteatom = siteatoms[0]
# Add a dummy atom to the ligand structure at optimal mythical metal
# position. Then we can superimpose that dummy onto the metal.
site = (siteatom.index, 0)
ligst = buildcomplex.Ligand(ligand, sites=[site], slots=[0])
dative_site = ligst.struct.atom[ligst.struct.atom_total].xyz
dummy = ligand.addAtom('Du', *dative_site)
# Now superimpose the atom-dummy_metal bond onto the framework
pair1 = (siteatom.property[SITE_PROP] + 1, framework.atom_total)
pair2 = (siteatom.index, dummy.index)
rmsd.superimpose_bond(framework, pair1, ligand, pair2)
ligand.deleteAtoms([dummy.index])
def _alignBidentate(self, ligand, siteatoms, framework):
"""
Align bidentate ligands to good binding location, ensuring that the
coordinating atoms are near the coordination sites and that the ligand
is flipped so that the bulk of the ligand points away from the metal.
:type ligands: list
:param ligands: List of ligand structures
:type siteatoms: list
:param siteatoms: Each item is the _StructureAtom object for a
coordinating atom
:type framework: `schrodinger.structure.Structure`
:param framework: A mock structure containing a dummy at each ideal
coordination location (in the same order as the coordination sites)
and a dummy at the metal atom position as the final atom.
"""
indexes = [x.index for x in siteatoms]
# Find the centroid of the atoms that connect the two sites. We want
# this spot to be exactly opposite of the metal position.
path = analyze.find_shortest_bond_path(ligand, *indexes)
pcentroid = transform.get_centroid(ligand, atom_list=path)[:3]
# Find the location that is the opposite of this centroid. That's where
# we want the metal atom to be. Doing this ensures that the bulk of the
# ligand points away from the metal rather than overlapping it.
coords1 = numpy.array(siteatoms[0].xyz)
coords2 = numpy.array(siteatoms[1].xyz)
coordscent = numpy.array(pcentroid)
vec1 = coords1 - coordscent
vec2 = coords2 - coordscent
vecsum = vec1 + vec2
opposite = coordscent + vecsum
# Superimpose the two binding sites plus metal site
dummy = ligand.addAtom('Du', *opposite)
frame_trio = (framework.atom_total,
siteatoms[0].property[SITE_PROP] + 1,
siteatoms[1].property[SITE_PROP] + 1)
lig_trio = (dummy.index, indexes[0], indexes[1])
rmsd.superimpose(framework,
frame_trio,
ligand,
lig_trio,
move_which=rmsd.CT)
ligand.deleteAtoms([dummy.index])
def _alignManydentate(self, ligand, siteatoms, framework):
"""
Align tri or tetradentate ligands to good binding location, ensuring
that the coordinating atoms are near the coordination sites. We do this
by simply superimposing the atoms on their binding sites.
:type ligands: list
:param ligands: List of ligand structures
:type siteatoms: list
:param siteatoms: Each item is the _StructureAtom object for a
coordinating atom
:type framework: `schrodinger.structure.Structure`
:param framework: A mock structure containing a dummy at each ideal
coordination location (in the same order as the coordination sites)
and a dummy at the metal atom position as the final atom.
"""
sitedexes = [x.index for x in siteatoms]
framedexes = [x.property[SITE_PROP] + 1 for x in siteatoms]
rmsd.superimpose(framework,
framedexes,
ligand,
sitedexes,
move_which=rmsd.CT)
[docs] def createBasicStructure(self):
"""
Create the basic structure of the complex - metal atom plus ligands
without the ligands actually bound to the metal and without optimizing
their positions
:rtype: `schrodinger.structure.Structure`
:return: The basic complex structure
"""
struct = structure.create_new_structure()
metal_atom = struct.addAtom('C', 0.0, 0.0, 0.0)
buildcomplex.transmute_atom(metal_atom, self.element)
for ligand in self.ligands:
struct.extend(ligand)
return struct
[docs] def addCappingLigands(self):
"""
Add capping groups to the requested positions
"""
for index, info in self.coordination_info.cappedSites():
group, capper = FRAG_DICT[info]
if not capper:
# This slot is not occupied
continue
# Add an H atom at the cap position - this will be replaced by
# the capper fragment
to_atom = self.struct.addAtom('H', *self.locations[index])
self.struct.addBond(1, to_atom.index, 1)
if capper == HYDROGEN:
continue
build.attach_fragment(self.struct, 1, to_atom.index, group, capper)
[docs] def addIdealAnchors(self):
"""
Add dummy atoms at each of the ideal coordination locations
"""
for atom, site in self.coordinatingAtoms(self.struct):
location = self.locations[site]
anchor = self.struct.addAtom('Du', *location)
atom.property[ANCHOR_PROP] = anchor.index
[docs] def minimizeIdealComplex(self, ligname):
"""
Minimize the complex using the passed in minimizer
:type ligname: str
:param ligname: The title of the ligand structure
:rtype: `schrodinger.structutils.minimize.Minimzer`
:return: The minimizer with structure and restraints set
"""
with minimize.minimizer_context(struct=self.struct,
no_zob_restrain=True) as mizer:
# Make sure the metal atom doesn't move
mizer.addPosRestraint(1, 20000.)
for atom, site in self.coordinatingAtoms(self.struct):
anchor = self.struct.atom[atom.property[ANCHOR_PROP]]
# Make sure dummies marking the ideal locations don't move
mizer.addPosRestraint(anchor.index, 20000.)
# Add the restraints that pull the actual atoms towards the ideal
# locations, while implementing MATSCI-6955 it was found that
# for eta bound ligands, represented with a central dummy atom
# ZOB-ed to the haptic ring atoms, if the real-anchor dummy
# atom pair is allowed arbitrary close then it triggers
# a force field overlapping atoms error, so pass a small number
# rather than zero
mizer.addDistanceRestraint(atom.index, anchor.index, 200., 1e-3)
# for eta bound ligands, represented with a central dummy atom
# ZOB-ed to the haptic ring atoms, restrain the haptic system,
# the 1.4 Ang. distance is a reference centroid to carbon distance
# for benzene
if atom.atomic_number == -2:
neighbors = [
bond.atom2
for bond in atom.bond
if bond.type == structure.BondType.Zero
]
for neighbor in neighbors:
mizer.addDistanceRestraint(atom.index, neighbor.index,
200., 1.4)
mizer.addAngleRestraint(1, atom.index, neighbor.index,
20000., 90.)
for neighbor_i, neighbor_j in itertools.permutations(
neighbors, 2):
angle = self.struct.measure(neighbor_i, atom,
neighbor_j)
mizer.addAngleRestraint(neighbor_i.index, atom.index,
neighbor_j.index, 20000., angle)
# It may not be necessary to run all the minimizations, but they are
# fast and ensure the complex achieves ideal geometry. Definitely once
# is not enough for good complex geometries from poor starting points.
mizer.minimize()
mizer.minimize()
mizer.minimize()
[docs] def deleteAnchors(self):
"""
Delete the dummy atoms that mark the ideal coordination sites
"""
dummies = [
x.property[ANCHOR_PROP]
for x, y in self.coordinatingAtoms(self.struct)
]
self.struct.deleteAtoms(dummies)