"""
Shared functionality between PrepWizard GUI and command-line PrepWizard.
Copyright Schrodinger, LLC. All rights reserved.
"""
# Note: this file must not import any gui-dependent modules (QtGui, QtWidgets,
# maestro_ui) so the tasks can run without display dependency
import base64
import enum
import itertools
import json
import os
import sys
import zlib
import schrodinger
from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.models import parameters
from schrodinger.protein import biounit
from schrodinger.protein import findhets
from schrodinger.protein import getpdb
from schrodinger.protein import annotation
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import measure
from schrodinger.structutils.interactions import hbond
from schrodinger.ui.qt.appframework2 import application
from schrodinger.utils import mmutil
from . import tasks as pwtasks
maestro = schrodinger.get_maestro()
[docs]class HetType(enum.Enum):
METAL_OR_ION = enum.auto()
DETECTED_LIGAND = enum.auto()
NON_WATER_SOLVENT = enum.auto()
OTHER = enum.auto()
# List of PDB residue names to NOT run Epik on:
EPIK_EXCLUSION_LIST = ['HEM', 'HEC']
STATE_PANELTY_PROP = 'r_epik_State_Penalty'
METAL_PENALTY_PROP = 'r_epik_Metal_State_Penalty'
ORIG_ANUM_PROP = 'i_ppw_anum'
HET_NUM_PROP = 'i_ppw_het'
HET_STATES_PROP = 's_ppw_het_states'
# Default weight of each hydrogen bond in the state score:
HBOND_ENERGY = -1.0 # kcal/mol/h-bond
# Penalize het states where neutral amide is interacting with a metal. This
# number must be large enough to overpower any favorable differences in Epik
# state penalty that the neutral (or positive) charged state may have.
METAL_AMIDE_CLASH_PENALTY = 20.0
# Consider NH...O as twice stronger than other H-bonds:
# https://doi.org/10.1021/jp103470e p.9534:
# "... Nevertheless, the hydrogen bond fit energies of all fit
# functions showed the general expected order4 of: ENH ··· O ≈
# -6 kJ/mol > ENH ··· N ≈ ECH ··· O ≈ -3 kJ/mol. ..."
HBOND_ENERGY_NHO = -2.0 # kcal/mol/h-bond
RANK_NH_O_HIGHER = mmutil.feature_flag_is_enabled(
mmutil.RANK_NH_O_HIGHER_IN_PPW)
PDBNAME_ELEMENT_DICT = {}
for pdbname in [' OD1', ' OE1', ' OD2', ' OE2']:
PDBNAME_ELEMENT_DICT[pdbname] = "O"
for pdbname in [' ND1', ' ND2', ' NE2']:
PDBNAME_ELEMENT_DICT[pdbname] = "N"
for pdbname in [' CG ', ' CD ', ' CD2', ' CE1']:
PDBNAME_ELEMENT_DICT[pdbname] = "C"
[docs]def retrieve_and_read_pdb(pdbid):
"""
Retrieve the given PDB and return the Structure object.
On error, raises RuntimeError.
"""
try:
# First attempt to retrieve from third-party database, then
# attempt download:
pdb_file = getpdb.get_pdb(pdbid)
if not os.path.isfile(pdb_file):
raise Exception("getpdb module did not write out the file %s" %
pdb_file)
except Exception as err:
msg = "Failed to retrieve or download PDB file for ID: %s" % pdbid
raise RuntimeError(msg)
# NOTE: For PDBReader, all_occ is already True by default. The
# InfraReader that is used to read CIF files does not have this option.
try:
st = next(structure.StructureReader(pdb_file))
except Exception as err:
raise RuntimeError(str(err))
return st
# Dictionary that defines possible ionization states for metals:
# Key is the atomic number.
METAL_AND_ION_CHARGES = {
11: [1], # NA Sodium +1
12: [2], # MG Magnesium +2
19: [1], # K Potassium +1
20: [2], # CA Calcium +2
25: [2, 3, 4, 5, 6, 7], # MN Manganese +2, +3, +4, +5, +6, +7
26: [2, 3], # FE Iron +2 +3
27: [2, 3], # CO Cobalt +2 +3
28: [2, 3], # NI Nickel +2 +3
29: [1, 2, 3], # CU Copper +1 +2 +3
30: [2], # ZN Zinc +2
# 48:[2], # CD Cadmium +2 No MacroModel parameters
# 80:[1, 2] HG Mercury +1 +2 No MacroModel parameters
}
epik_exe = os.path.join(os.environ['SCHRODINGER'], 'epik')
impref_exe = os.path.join(os.environ['SCHRODINGER'], 'utilities', 'impref')
protassign_exe = os.path.join(os.environ['SCHRODINGER'], 'utilities',
'protassign')
if sys.platform == 'win32':
epik_exe += ".exe"
impref_exe += ".exe"
protassign_exe += ".exe"
def _count_bonds(a):
"""
Count number of bonds to the given StructureAtom (double bonds = 2 bonds),
zero-order bonds are not counted.
"""
total_orders = 0
for bond in a.bond:
order = bond.order
total_orders += order
return total_orders
[docs]def serialize_het_states(states_by_het, complex_st):
"""
Set JSON string for all given states as a CT-level property.
"""
# Keys are het nums (as str, as required by JSON module), values are
# lists of hets. Het number is included so that restoring is possible
# even if some hets were later removed by the user (i_ppw_het
# property would remain unchanged).
states_by_het = {
str(hetnum): [s.serialize() for s in states
] for hetnum, states in states_by_het.items()
}
json_str = json.dumps(states_by_het)
compressed_str = zlib.compress(bytes(json_str, 'utf-8'))
encoded_str = base64.b64encode(compressed_str).decode('utf-8')
complex_st.property[HET_STATES_PROP] = encoded_str
[docs]def deserialize_het_states(complex_st):
"""
Attempt to read the het states from the CT using the s_ppw_het_states
property, if present.
"""
encoded_str = complex_st.property.get(HET_STATES_PROP)
if not encoded_str:
# No states to load
return []
compressed_str = base64.b64decode(encoded_str)
json_str = zlib.decompress(compressed_str)
states_dict = json.loads(json_str)
# In JSON, all keys are str, to cast to int:
all_hets_and_states = {int(hetnum): v for hetnum, v in states_dict.items()}
return all_hets_and_states
[docs]class HetState:
"""
Class representing an ionization/tautomeric state of a het (e.g. ligand).
"""
[docs] def __init__(self, hetnum):
self.hetnum = hetnum # Het number (value of i_ppw_het properties)
self.charges = None # items=(original anum, formal charge)
self.orders = None # items=(orig anum1, orig anum2, order)
self.penalty = None # Epik state penalty; 0.0 for metals
self.metal_penalties = None # keys=orig anum, values=metal penalty
self.score = None # Score of the het state, in context of protein
self.info_str = None # Information string explaining state details,
# including num hbonds to receptor.
[docs] @classmethod
def initFromEpikOutput(cls, hetnum, state_st):
"""
Return a HetState instance for the given Epik output structure.
"""
self = cls(hetnum)
self.charges, self.orders = self.extractStateChargesAndOrders(state_st)
state_penalty, metal_penalties = get_state_penalties(state_st)
self.penalty = state_penalty # Will be 0.0 for metals
self.metal_penalties = metal_penalties
# NOTE: score and info_str will be calculated when state is applied
# to the complex CT
return self
[docs] @classmethod
def initFromSerializedState(cls, hetnum, serialized_state):
"""
Return a HetState instance for the given serialized state.
"""
self = cls(hetnum)
(self.charges, self.orders, self.penalty, self.metal_penalties,
self.score, self.info_str) = serialized_state
# Convert atom number keys back to ints. This is needed because all
# keys are loaded as strings when reading JSON:
self.metal_penalties = {
int(anum): penalty
for anum, penalty in self.metal_penalties.items()
}
return self
[docs] def serialize(self):
# JSON requires keys to be strings:
return (self.charges, self.orders, self.penalty, self.metal_penalties,
self.score, self.info_str)
[docs] def extractStateChargesAndOrders(self, state_st):
hetnum = self.hetnum
# Dict of charge information for each atom in the output state.
# Keys are original/input atom indices:
charge_data = []
for atom in state_st.atom:
# Iterate over all atoms in this Epik output CT:
anum = atom.property.get(ORIG_ANUM_PROP)
# Skip atoms that were not in the original het. This includes the
# protein atoms covalently bound to the het, and hydrogens that
# were added by Epik.
if atom.property.get(HET_NUM_PROP) != hetnum:
continue
# If we got here then we are processing a heavy atom that is part
# of the het - so it must have i_ppw_anum set, as Epik doesn't
# touch heavy atoms.
assert anum, "ERROR: No i_ppw_anum property for atom"
charge_data.append((anum, atom.formal_charge))
order_data = []
for bond in state_st.bond:
atom1 = bond.atom1
atom2 = bond.atom2
if atom1.property.get(HET_NUM_PROP) != hetnum or atom2.property.get(
HET_NUM_PROP) != hetnum:
# Hydrogen or covalently bound protein atom:
continue
anum1 = atom1.property.get(ORIG_ANUM_PROP)
anum2 = atom2.property.get(ORIG_ANUM_PROP)
assert anum1 is not None and anum2 is not None
order_data.append((anum1, anum2, bond.order))
return charge_data, order_data
[docs] def applyState(self, st):
"""
Apply this het state to the specified complex structure: modify
the bond orders and formal charges to apply the currently selected
het state.
Also calculates the score and info_str properties
Raises RuntimeError if any of the het atoms are no longer in the
structure, or are not numbered correctly.
"""
# Delete hydrogen neighbors before adding them according to next state:
het_atom_indices = self.findHetAtoms(st)
hydrogens = hydrogen_neighbors(st, het_atom_indices)
st.deleteAtoms(hydrogens)
# Need to find het atoms again, as it's possible that removing
# hydrogens has altered atom ordering:
het_atom_indices = self.findHetAtoms(st)
# Keys are original atom indices (prior to pre-processing), values are
# atom objects, for hets that are still present in the structure.
atoms_by_anum = {}
for anum in het_atom_indices:
atom = st.atom[anum]
# Iterate over het heavy atoms in the Workspace CT:
# This atom's atom number in input (unprepared) complex CT:
anum = atom.property.get(ORIG_ANUM_PROP)
assert anum is not None
atoms_by_anum[anum] = atom
expecting_anums = {anum for (anum, _) in self.charges}
for anum1, anum2, _ in self.orders:
expecting_anums.add(anum1)
expecting_anums.add(anum2)
missing_atoms = expecting_anums.difference(atoms_by_anum.keys())
if missing_atoms:
raise RuntimeError(
'Could not apply het state (%i atoms missing or renumbered)' %
len(missing_atoms))
# Change the bond orders and charges as necessary:
for anum, charge in self.charges:
atom = atoms_by_anum[anum]
atom.formal_charge = charge
if charge > 0:
charge_str = '+%i' % charge
elif charge < 0:
charge_str = '%i' % charge
else:
charge_str = ''
atom.label_user_text = str(charge_str)
atom.label_format = "%UT"
atom.label_color = 13
for anum1, anum2, order in self.orders:
atom1 = atoms_by_anum[anum1]
atom2 = atoms_by_anum[anum2]
bond = st.getBond(atom1, atom2)
bond.order = order
num_atoms_pre_htreat = st.atom_total
# Add hydrogens according to the new bond orders and formal charges:
st.retype()
build.add_hydrogens(st, atom_list=het_atom_indices)
# Hide the new hydrogens (so that they are not flickered when selecting
# state):
for anum in range(num_atoms_pre_htreat + 1, st.atom_total):
st.atom[anum].visible = False
# Calculate the total score for this state. The score may have changed
# since the states were generated by Epik due to user's changes - for
# example removing an interacting metal or water molecule.
self.score, self.info_str = calc_state_score(st, het_atom_indices,
self.penalty,
self.metal_penalties)
[docs] def findHetAtoms(self, complex_st):
"""
Return a list of het heavy atoms in the given complex structure.
"""
asl = "(atom.%s %i)" % (HET_NUM_PROP, self.hetnum)
return analyze.evaluate_asl(complex_st, asl)
class _CommonProblemsFixer:
"""
Class for fixing common structure problems.
"""
def _hasZobs(self, atom):
"""
Return True if any of the bonds of this atom are zero-order.
"""
for bond in atom.bond:
if bond.order == 0:
return True
return False
def _setFormalCharge(self, atom, new_charge):
"""
Set the formal charge for the given atom. Logs the change in the
self._corrections_list.
"""
prev_formal_charge = atom.formal_charge
if prev_formal_charge == new_charge:
return
atom.formal_charge = new_charge
msg = "Changed atom %i (%s) charge from %i to %i" % (
atom.index, atom.element, prev_formal_charge, new_charge)
self._corrections_list.append(msg)
def _deprotCarbAcid(self, carbon):
"""
De-protonate the specified carboxylic acid.
:type carbon: `structure._StructureAtom` object
:param carbon: The carbon of the carboxylic acid.
"""
# NOTE: This is no longer needed due to CONV-855/CONV-860 fix
oxygen_double = None
oxygen_single = None
for bond in carbon.bond:
atom2 = bond.atom2
if atom2.element == "O":
if bond.order == 2:
oxygen_double = atom2
else:
oxygen_single = atom2
if not oxygen_double or not oxygen_single:
# If this ever happens, then this is not a carboxylic acid, or the
# residue has missing atoms. Skip this group.
return
self._setFormalCharge(oxygen_single, -1)
# Delete the hydrogen bound to the oxygen (if any):
for other_atom in oxygen_single.bonded_atoms:
if other_atom.element == "H":
self._atoms_to_delete.append(other_atom.index)
correction = (
"Deleted hydrogen %i" % other_atom.index +
" because it was interfering with a zero-order bond")
self._corrections_list.append(correction)
def _fixElement(self, atom):
"""
If an atom's element (Whether it's an oxygen, nitrogen, or carbon) does
not correspond to its PDB name, it gets reset. (Ev:69207)
"""
expected_element = PDBNAME_ELEMENT_DICT.get(atom.pdbname)
if expected_element:
prev_element = atom.element
if prev_element != expected_element:
correction = "Changed atom %i (%s) element from %s to %s" % (
atom.index, atom.pdbname, prev_element, expected_element)
atom.element = expected_element
self._corrections_list.append(correction)
def _processSulfur(self, atom):
"""
Ev:105338 Fix formal charges on sulfurs
"""
all_neighbors_are_carbons = True
total_orders = 0
for bond in atom.bond:
order = bond.order
total_orders += order
if bond.atom2.element != "C":
all_neighbors_are_carbons = False
if total_orders == 3 and all_neighbors_are_carbons:
prev_formal_charge = atom.formal_charge
if prev_formal_charge != 1:
self._setFormalCharge(atom, +1)
def _processBackboneOxygen(self, atom):
"""
PPREP-875 Remove extra bonds when residue oxygens are bound to covalent
ligands
"""
# R-O=R will become R-O-R
if _count_bonds(atom) >= 3 and atom.formal_charge <= 0:
# This oxygen is making one too many bonds. Remove one.
# We ignore R-O(+)=R, which is valid.
if len(atom.bond) != 2:
return
# For now we handle only the case where one bond is single
# and one bond is double.
# Find the double bond and make it single:
for bond in atom.bond:
if bond.order >= 2:
bond.order = 1
res = str(bond.atom1.getResidue())
correction = "Modified bond %i-%i (%s): Set order to 1" % \
(bond.atom1, bond.atom2, res)
self._corrections_list.append(correction)
break
def run(self, st):
"""
Fix common structure problems in the given Structure.
:rtype: list of strings
:return: A list of texts describing what changes were made.
"""
self._atoms_to_delete = []
self._corrections_list = []
for atom in st.atom:
self._fixElement(atom)
if atom.element == "S":
self._processSulfur(atom)
# Ev:108985
elif atom.element == "B":
if _count_bonds(atom) == 4 and atom.formal_charge == 0:
self._setFormalCharge(atom, -1)
# Ev:120900 Delete hydrogens with more than one non-ZOB bond (PDB error)
elif atom.element == "H":
num_bonds = 0
for bond in atom.bond:
if bond.order > 0:
num_bonds += 1
if num_bonds > 1:
self._atoms_to_delete.append(atom.index)
correction = "Deleted atom %i: This hydrogen had invalid number of bonds (%i)" % (
atom.index, num_bonds)
self._corrections_list.append(correction)
elif atom.pdbname == " O ":
self._processBackboneOxygen(atom)
elif atom.pdbname in (" OD1", " OD2"):
# PPREP-1092 Find carboxylic acids with ZOBs
# Use pdbnames instead of elements as an optimization (since
# fewer atoms will have these names, while there are many Os).
# NOTE: This is no longer needed due to CONV-855/CONV-860 fix
if self._hasZobs(atom):
for bond in atom.bond:
if bond.order != 0 and bond.atom2.element == "C":
self._deprotCarbAcid(bond.atom2)
# PPREP-943 Fix formal charges on some irons:
# (FIXME perhaps make this correction more broad?)
elif atom.element == "Fe":
if atom.pdbres == "SRM " and atom.pdbname == "FE ":
self._setFormalCharge(atom, +2)
if self._atoms_to_delete:
st.deleteAtoms(self._atoms_to_delete)
st.retype()
return self._corrections_list
[docs]def fix_common_structure_mistakes(st):
"""
Fixes common problems in structures and returns a list of correction strings
that are to be reported to the user.
"""
return _CommonProblemsFixer().run(st)
[docs]def atomsasl(atoms):
""" Generates an ASL expression for the specified atoms. """
if len(atoms) == 0:
raise Exception("ERROR: empty atom list passed to atomsasl()!")
asl = "(atom.num %s)" % ', '.join(map(str, atoms))
return asl
def _create_bonds_by_smarts(st,
bond_type,
smarts1,
smarts2,
dist,
verbose=False):
"""
General function to create bonds between the first atom in two
different smarts patterns if they are closer then a distance. The smarts
matching is performed on a per residue basis.
:param st: Structure to modify.
:type st: Schrodinger.structure
:param bond_type: Bond type to be used for reporting
:type bond_type: String
:param smarts1: The first smarts pattern to consider, by convention
this should be the protein if that applies
:type bond_type: String
:param smarts2: The second smarts pattern to consider, by convention
this should be the non-protein part if that applies
:type bond_type: String
:param dist: Atoms must be at least this close to consider for
for glycosilation
:type dist: float
:param verbose: Whether to print formed bonds to stdout
:type verbose: boolean
:rparam: Pairs of atom ( in the output structure ) where
bonds were added
:rtype: list of schrodinger atom objects
"""
output = []
attach_list = [[], []]
for res in st.residue:
res_anums = res.getAtomIndices()
res_ct = res.extractStructure()
for smarts, attach in zip([smarts1, smarts2], attach_list):
match = analyze.evaluate_smarts_canvas(res_ct, smarts)
# Map residue atom ids back to main structure
for a_match in match:
anum_by_st = res_anums[a_match[0] - 1]
attach.append(anum_by_st)
smarts1_attach, smarts2_attach = attach_list
if not smarts1_attach or not smarts2_attach:
return output
atoms_to_delete = []
atoms_to_add_hyd = []
used_atoms = set()
atoms = smarts1_attach + smarts2_attach
for iatom1, iatom2 in measure.get_close_atoms(st, dist, atoms=atoms):
atom1 = st.atom[iatom1]
atom2 = st.atom[iatom2]
# Don't reuse atoms
if atom1.index in used_atoms or atom2.index in used_atoms:
continue
# Identify close atoms that are not bound
if st.areBound(atom1, atom2):
continue
# Check both ways
for s1_atom, s2_atom in itertools.permutations([atom1, atom2]):
if s1_atom.index not in smarts1_attach:
continue
if s2_atom.index not in smarts2_attach:
continue
st.addBond(s1_atom, s2_atom, 1)
output.append((s1_atom, s2_atom))
used_atoms.add(s1_atom.index)
used_atoms.add(s2_atom.index)
# Keep track of the hydrogens to delete on each side
# and which atoms to add hydrogens to. This will all be
# done at the end to avoid messing up indexes
for atom in s1_atom, s2_atom:
for ba in atom.bonded_atoms:
if ba.element == "H":
atoms_to_delete.append(ba.index)
atoms_to_add_hyd.append(atom.index)
if verbose:
print(' Created a %s bond between %s and %s' %
(bond_type, "%s:%d%s(%s):%s" %
(s1_atom.chain, s1_atom.resnum, s1_atom.inscode,
s1_atom.pdbres, s1_atom.pdbname), "%s:%d%s(%s):%s" %
(s2_atom.chain, s2_atom.resnum, s2_atom.inscode,
s2_atom.pdbres, s2_atom.pdbname)))
# Note that since we are using structure Atom objects for output,
# the indexes autoamtically update as the underlying CT is changed
if len(atoms_to_delete) > 0:
rmap = st.deleteAtoms(atoms_to_delete, renumber_map=True)
atoms_to_add_hyd = [rmap[i] for i in atoms_to_add_hyd]
if len(atoms_to_add_hyd) > 0:
build.add_hydrogens(st, atom_list=atoms_to_add_hyd)
return output
[docs]def create_glycosylation_bonds(st, dist=1.8, verbose=True):
"""
Create glycosylation bonds for N-linked or O-linked glycosilation events
Identfies neutral O or N with implicit or explicit hydrogens and
forms bonds to sugars ( ring with 5 aliphatic carbons and one oxygen )
at locations adjacent to the oxygen.
:param st: Structure to modify.
:type st: Schrodinger.structure
:param dist: Atoms must be at least this close to consider for
for glycosilation
:type dist: float
:param verbose: Whether to print formed bonds to stdout
:type verbose: boolean
:rparam: Pairs of atom ( in the output structure ) where
bonds were added
:rtype: list of schrodinger atom objects
"""
# ( note that only NH1 is checked for for ARG as NH2 is positively
# charged.
return _create_bonds_by_smarts(
st,
"glycosylation",
"[N,O;+0;h,H1,H2]",
"[C;r6;H2,H1,h][O;r6][C;r6][C;r6][C;r6][C;r6]",
dist,
verbose=verbose)
[docs]def create_palmitoylation_bonds(st, dist=1.8, verbose=True):
"""
Create palmitoylation bonds.
Identfies neutral cysteine S with implicit or explicit hydrogens and
palmitoyl groups or palmitoyl groups with the OH of the acid replace by
a hydrogen
:param st: Structure to modify.
:type st: Schrodinger.structure
:param dist: Atoms must be at least this close to consider for
for pamitoylation
:type dist: float
:param verbose: Whether to print formed bonds to stdout
:type verbose: boolean
:rparam: Pairs of atom ( in the output structure ) where
bonds were added
:rtype: list of schrodinger atom objects
"""
return _create_bonds_by_smarts(st,
"palmitoylation",
"[S,O;+0;h,H1,H2]",
"[C;+0;h,H1](=[O;+0])",
dist,
verbose=verbose)
[docs]def create_disulfide_bonds(st, dist=3.2, verbose=False):
"""
Create bonds between proximal Sulfurs, deleting any hydrogens on them.
If verbose is True, prints log info to the termnal.
Returns a list of (atom1, atom2) for ever added bond.
"""
cys_sulfurs = analyze.evaluate_asl(
st,
'((res.ptype "CYS ") OR (res.ptype "CYX ")) AND ((atom.ptype " SG "))')
added_bonds = [] # Ev:92646
atoms_to_delete = []
for isulfur_index in cys_sulfurs:
for jsulfur_index in cys_sulfurs:
if jsulfur_index >= isulfur_index:
continue
isulfur = st.atom[isulfur_index]
jsulfur = st.atom[jsulfur_index]
# Ev:129699 Skip this pair if at least one is already bound to
# something (perhaps from a previous iteration of this loop)
pair_hydrogens = []
skip = False
for a in [isulfur, jsulfur]:
num_heavy = 0
for bond in a.bond:
if bond.order > 0: # Ev:131161 Ignore zero-order bonds
n = bond.atom2
if n.element == 'H':
pair_hydrogens.append(n.index)
else:
num_heavy += 1
if num_heavy > 1:
skip = True
if skip:
continue
if st.measure(isulfur, jsulfur) <= dist:
isulfur.addBond(jsulfur, 1)
added_bonds.append((isulfur.index, jsulfur.index))
isulfur.formal_charge = 0
jsulfur.formal_charge = 0
atoms_to_delete += pair_hydrogens
ires = isulfur.getResidue()
jres = jsulfur.getResidue()
# Ev:88748 Change the resname of the combined residue to CYX:
if ires.pdbres != "CYX " or jres.pdbres != "CYX ":
ires.pdbres = "CYX "
jres.pdbres = "CYX "
if verbose:
print(
' Created a bond between %s and %s; changed residue names to "CYX "'
% (ires, jres))
else:
if verbose:
print(' Created a bond between %s and %s' %
(ires, jres))
# Must delete atoms at the end, in order to preserve atom numbers:
st.deleteAtoms(atoms_to_delete)
st.retype()
return added_bonds
[docs]def convert_selenomethionines(st):
"""
Convert MSE residues to METs.
Returns a list of residue strings that were converted.
"""
converted_residues = []
for res in st.residue:
if res.pdbres == 'MSE ':
# Convert this Selenomethionine to a Methionine:
res_str = str(res)
converted_residues.append(res_str)
for atom in res.atom:
if atom.pdbname == 'SE ':
atom.element = 'S'
atom.pdbname = ' SD '
atom.color = 'yellow'
res.pdbres = 'MET'
st.retype()
return converted_residues
def _set_oxygen_orders(st, mainatom, double, single, charge):
"""
Sets the orders of phosphate-oxygen or sulfate-oxygen bonds.
mainatom - atom number of the phosphate or sulfur
double - list of atom numbers of the oxygens to double-bond to
single - list of atom numbers of oxygens to single-bond to, and set charge to <charge>.
"""
st = st.copy()
for anum in double:
bond = st.getBond(mainatom, anum)
bond.order = 2
st.atom[anum].formal_charge = 0
for anum in single:
bond = st.getBond(mainatom, anum)
bond.order = 1
st.atom[anum].formal_charge = charge
return st
def _generate_phosphate_states(st, atoms):
"""
Returns a list of structures (expanded states)
"""
# TODO move this code into the GenerateStatesTask class
phosphate = atoms[0]
double_oxygen = atoms[1]
# Make a list of negative hydrogens and hydrogens with hydrogens:
negative_oxygens = []
hydrogen_oxygens = []
# Iterate over all oxygens with single bond to Phosphate:
for anum in atoms[2:]:
if st.atom[anum].bond_total == 1:
if st.atom[anum].formal_charge == -1:
negative_oxygens.append(anum)
else:
hydrogen_oxygens.append(anum)
# FIXME For now, do not deal with this Phosphate if it has any hydrogen
# oxygens:
if hydrogen_oxygens:
return [st]
if not negative_oxygens:
# No states can be generated from this Phosphate
return [st]
# Alternate double bond between the oxygens:
if len(negative_oxygens) == 1:
st2 = _set_oxygen_orders(st, phosphate, [negative_oxygens[0]],
[double_oxygen], -1)
return [st, st2]
elif len(negative_oxygens) == 2:
st2 = _set_oxygen_orders(st, phosphate, [negative_oxygens[0]],
[double_oxygen, negative_oxygens[1]], -1)
st3 = _set_oxygen_orders(st, phosphate, [negative_oxygens[1]],
[double_oxygen, negative_oxygens[0]], -1)
return [st, st2, st3]
else:
print('ERROR: wrong number of negative oxygens:', len(negative_oxygens))
return [st]
def _generate_sulfate_states(st, atoms):
"""
Returns a list of structures (expanded states)
"""
# TODO move this code into the GenerateStatesTask class
sulfur = atoms[0]
double_oxygen1 = atoms[1]
double_oxygen2 = atoms[2]
# Make a list of negative hydrogens and hydrogens with hydrogens:
negative_oxygens = []
hydrogen_oxygens = []
# Iterate over all oxygens with single bond to Sulfur:
for anum in atoms[3:]:
if st.atom[anum].bond_total == 1:
if st.atom[anum].formal_charge == -1:
negative_oxygens.append(anum)
else:
hydrogen_oxygens.append(anum)
# Alternate double bond between the oxygens:
if len(negative_oxygens) == 1 and len(hydrogen_oxygens) == 0:
# st1 is equivalent of: [double_oxygen1, double_oxygen2], negative_oxygens[0]
st2 = _set_oxygen_orders(st, sulfur,
[double_oxygen1, negative_oxygens[0]],
[double_oxygen2], -1)
st3 = _set_oxygen_orders(st, sulfur,
[double_oxygen2, negative_oxygens[0]],
[double_oxygen1], -1)
return [st, st2, st3]
elif len(hydrogen_oxygens) == 1 and len(negative_oxygens) == 0:
# st1 is equivalent of: [double_oxygen1, double_oxygen2], hydrogen_oxygens[0]
st2 = _set_oxygen_orders(st, sulfur,
[double_oxygen1, hydrogen_oxygens[0]],
[double_oxygen2], 0)
st3 = _set_oxygen_orders(st, sulfur,
[double_oxygen2, hydrogen_oxygens[0]],
[double_oxygen1], 0)
return [st, st2, st3]
else:
# No states can be generated from this Sulfate
return [st]
[docs]def count_phosphates_and_sulfurs(st):
# Find any phosphates:
p_smarts = "P(=O)(O)(O)O"
s_smarts = "S(=O)(=O)(O)O"
p_matches = analyze.evaluate_smarts_canvas(st, p_smarts)
s_matches = analyze.evaluate_smarts_canvas(st, s_smarts)
main_atoms = set()
for atoms in p_matches + s_matches:
main_atoms.add(atoms[0])
return len(main_atoms)
[docs]def extend_phosphate_states(st):
"""
For specified structure, generates phosphate states, and returns list
of output structures. Ev:78688
NOTE: Output structure has no hydrogens.
"""
# TODO move this code into the GenerateStatesTask class
hydrogens = []
for a in st.atom:
if a.atomic_number == 1:
hydrogens.append(a)
st.deleteAtoms(hydrogens)
# Find any phosphates:
p_smarts = "P(=O)(O)(O)O"
matches = analyze.evaluate_smarts_canvas(st, p_smarts)
# Remove duplicates:
matches_no_dups = []
sorted_matches = []
for match in matches:
sorted_match = sorted(match)
if sorted_match not in sorted_matches:
sorted_matches.append(sorted_match)
matches_no_dups.append(match)
sts = [st]
for atoms in matches_no_dups:
all_extended_sts = []
for st in sts:
extended_sts = _generate_phosphate_states(st, atoms)
all_extended_sts.extend(extended_sts)
sts = all_extended_sts
return sts
[docs]def extend_sulfate_states(st):
"""
For specified structure, generates sulfate states, and returns list
of output structures. Ev:82634
"""
# TODO move this code into the GenerateStatesTask class
hydrogens = []
for a in st.atom:
if a.atomic_number == 1:
hydrogens.append(a)
st.deleteAtoms(hydrogens)
# Find any sulfates in the structure:
s_smarts = "S(=O)(=O)(O)O"
matches = analyze.evaluate_smarts_canvas(st, s_smarts)
# Remove duplicates:
matches_no_dups = []
sorted_matches = []
for match in matches:
sorted_match = sorted(match)
if sorted_match not in sorted_matches:
sorted_matches.append(sorted_match)
matches_no_dups.append(match)
sts = [st]
for i, atoms in enumerate(matches_no_dups):
# For this sulfate, extend the states:
all_extended_sts = []
for j, st in enumerate(sts):
extended_sts = _generate_sulfate_states(st, atoms)
all_extended_sts.extend(extended_sts)
sts = all_extended_sts
# Add hydrogens, because hydrogens were added to some atoms:
for st in sts:
build.add_hydrogens(st)
return sts
[docs]def get_chain_sequences(st, remove_tails=True):
"""
Will read the PDB sequences from the sequence block, and will return a
dictionary (keys: chain names; values: sequence strings).
If remove_tails is True, will chop off tails that are not existent in
the CT, but will leave in the missing loop sections.
Will raise RuntimeError on an error, or mmerror on mmct failure.
"""
st = st.copy()
# Make a list of residues from the chain sequence that are in the CT:
used_chain_indexes = {} # key: chain; value: list of ints
for chain in st.chain:
chain_used_indexes = []
for res in chain.residue:
index = res.atom[1].property.get('i_pdb_seqres_index')
if index is not None:
chain_used_indexes.append(index)
used_chain_indexes[chain.name] = chain_used_indexes
# Get the PDB sequence strings for each chain:
try:
data_handle = mm.mmct_ct_m2io_get_unrequested_handle(st)
except mm.MmException:
data_handle = mm.mmct_ct_get_or_open_additional_data(st, True)
chain_seq_dict = {}
# Ev:124587 Do not attempt to read the sequence if it's not present:
# will be 1 in sequence data exists:
num_seqres_blocks = mm.m2io_get_number_blocks(data_handle, "m_PDB_SEQRES")
if num_seqres_blocks == 0:
raise RuntimeError("Input structure had no sequence block data")
mm.m2io_goto_block(data_handle, "m_PDB_SEQRES", 1)
num_rows = mm.m2io_get_index_dimension(data_handle)
for i in range(1, num_rows + 1):
chain_strings = mm.m2io_get_string_indexed(data_handle, i,
["s_pdb_chain_id"])
seqres_strings = mm.m2io_get_string_indexed(data_handle, i,
["s_pdb_SEQRES"])
chain = chain_strings[0]
seqres = seqres_strings[0]
chain_seq_dict[chain] = seqres
chain_strings = list(chain_seq_dict)
used_chains = list(used_chain_indexes)
for chain in used_chains:
if chain not in chain_strings:
msg = 'WARNING: Chain "%s" did not have a sequence record' % chain
print(msg)
# Modify the sequence strings by chopping off tails:
output_sequences = {} # key: chain name; value: sequence string
mm.mmseq_initialize(mm.error_handler)
for chain, seqres in chain_seq_dict.items():
if chain not in used_chains:
# Ev:102107 Possibly the user has deleted this chain
continue
# Figure out what residues in this sequence are missing in the CT:
used_indexes = used_chain_indexes[chain]
# used_indexes is 1-indexed
# list of tuples: index=res index; value: (one letter code, whether it
# is used)
sequence_list = []
numres = len(seqres) // 4
for i in range(numres):
beg = i * 4
end = i * 4 + 4
resname = seqres[beg:end]
onelett = mm.mmseq_get_one_letter_res_name(resname)
# Ev:86135 i is zero-indexed; while the list is 1-indexed:
used = bool(i + 1 in used_indexes)
sequence_list.append((onelett, used))
in_sequence = ""
for onelett, used in sequence_list:
in_sequence += onelett
if remove_tails:
# Chop off the leading tail:
while sequence_list:
onelett, used = sequence_list[0]
if used:
break
else:
sequence_list.pop(0)
# Chop off the trailing tail:
while sequence_list:
onelett, used = sequence_list[-1]
if used:
break
else:
sequence_list.pop(-1)
if not sequence_list:
raise RuntimeError("All of sequence for chain %s was missing" %
chain)
# Convert to a sequence string (excluding tails):
out_sequence = ""
for onelett, used in sequence_list:
out_sequence += onelett
output_sequences[chain] = out_sequence
mm.mmseq_terminate()
return output_sequences
[docs]def write_sequences_to_fasta(pdbid, sequences_dict, fastafile):
pdbid = pdbid.upper()
fh = open(fastafile, 'w')
for chain, sequence in sequences_dict.items():
numres = len(sequence)
line1 = ">%s_%s, %i bases,\n" % (pdbid, chain, numres)
line2 = sequence + '\n'
fh.write(line1)
fh.write(line2)
fh.close()
[docs]def does_res_have_missing_side_chains(residue):
"""
Given a _Residue object, returns True if the residue is missing
side-chain atoms. If at least one backbone atom is (also) missing,
False is returned.
Basically only residues for which Prime missing-side-chains job can
be run will return True.
"""
# Ev:96128 Use a new mechanism to determine which residues have missing
# atoms. This method does NOT rely on the i_m_pdb_convert_problem
# property, which may be out-of-date:
# DPB names for the residue backbone atoms:
backbone_names = {" C ", " CA ", " N ", " O "}
# Do not show DNA/RNA residues, because we can't fill missing side-chains
# on them:
pdbres = residue.pdbres
if pdbres in [
' A ', ' C ', ' G ', ' U ', ' DA ', ' DC ', ' DG ', ' TD '
]:
return False
if not residue.hasMissingAtoms():
return False
heavy_elements = []
heavy_atom_names = set()
for atom in residue.atom:
if atom.atomic_number > 1:
heavy_elements.append(atom.element)
heavy_atom_names.add(atom.pdbname)
# Ev:116225 Ignore residues that have been modified to -NH2 caps:
if heavy_elements == ["N"]:
return False
# Ev:126307 Ignore residues that have been modified to -NH-CH3 caps:
if sorted(heavy_elements) == ["C", "N"]:
return False
# PPREP-794 Ignore residues that are missing any backbone atoms:
# (Note that this makes the two tests above unnecessary)
if not heavy_atom_names.issuperset(backbone_names):
return False
# If got here, the reisude has missing side-chain atoms:
return True
[docs]def do_any_residues_have_missing_side_chains(st):
"""
Returns True if at least one of the residue in the given structure
has missing side-chain atoms (backbone atoms are ignored).
"""
for res in st.residue:
if does_res_have_missing_side_chains(res):
return True
return False
[docs]def fix_sulfur_charges(st):
"""
Post process by fixing the charge on zero-order-bonded Sulfurs
Gives -1 or -2 charge to Sulfurs as appropriate.
Deletes a hydrogen from Sulfurs coordinating with metals
(Ev:61622)
"""
hydrogens_to_delete = []
for sulfur in st.atom:
if sulfur.element != 'S':
continue
bonded_to_iron = False
# Bonds for non-metal neighbors to this Sulfur (excluding hydrogens)
reg_bonds = 0
zob_bonds = 0 # How many bonds to metals
hydrogens = [] # Hydrogen atoms bonded to this Sulfur
for bond in sulfur.bond:
bo = bond.order
n = bond.atom2
if n.element == 'Fe':
# Set charge of Iron in FeS clusters to +2
n.formal_charge = +2
bonded_to_iron = True
if bo == 0:
zob_bonds += 1
elif n.element == 'H':
hydrogens.append(n)
else: # regular bond to non-hydrogen atom
reg_bonds += bo
# Number of hydrogens depends on number of regular bonds + number of zobs:
if reg_bonds + zob_bonds == 0:
max_hydrogens = 2
elif reg_bonds + zob_bonds == 1:
max_hydrogens = 1
elif reg_bonds + zob_bonds == 2:
max_hydrogens = 0
else:
# This is an unexpected hybridization state. Do not delete any
# hydrogens (PPREP-694). Unless this sulfur is bond to an iron,
# in which case we will still delete all hydrogens from it, and will
# adjust the formal charge (PPREP-944).
if bonded_to_iron:
max_hydrogens = 0
else:
# Unknown hybridization state and NOT bound to an iron; skip:
continue
# Delete all hydrogens that are above the number allowed:
while len(hydrogens) > max_hydrogens:
hydrogens_to_delete.append(hydrogens.pop(0))
kept_hydrogens = len(hydrogens)
# The charge depends on the number of regular bonds (zobs excluded):
if kept_hydrogens + reg_bonds == 0:
sulfur.formal_charge = -2
elif kept_hydrogens + reg_bonds == 1:
sulfur.formal_charge = -1
elif kept_hydrogens + reg_bonds == 2:
sulfur.formal_charge = 0
if hydrogens_to_delete:
st.deleteAtoms(hydrogens_to_delete)
st.retype() # Correct the MacroModel atom types.
[docs]def prepare_for_epik(complex_st, types_to_process):
"""
Extract hets from the given complex structure, (except het types that user
requested to skip), and separate retained hets into 2 lists: Those that
Epik should be run on, those that Epik doesn't produce any states for.
Atoms in the original structure will be marked with i_ppw_anum
property, for matching Epik output to atoms in the input structure.
:param complex_st: Receptor-ligand complex structure
:type complex_st: structure.Structure
:param types_to_process: List of het types should be processed
:type types_to_process: [HetType]
:return: List of structures to run Epik on, and list of structures that
should be processed, but without Epik.
:rtype: [structure.Structure], [structure.Structure]
"""
# NOTE: Whether states are skipped here or not had no affect on
# whether they are shown in the GUI's Substructures tab - it will
# always show all hets, even if they have no states generated.
# Set the hetero atom number property on the hetero atoms in the stucture,
# so Epik results can be matched to the atoms in the complex structure.
het_atoms_lists = findhets.find_hets(complex_st)
if not het_atoms_lists:
return [], []
print("Processing %i het groups..." % len(het_atoms_lists))
# Annotate the structure with properties, so that Epik results can
# be mapped back to the input structure:
for atom in complex_st.atom:
atom.property[ORIG_ANUM_PROP] = atom.index
for hetnum, het_atoms in enumerate(het_atoms_lists, start=1):
# This is required by prepare_for_epik() function now:
for atomnum in het_atoms:
complex_st.atom[atomnum].property[HET_NUM_PROP] = hetnum
sts_to_run_epik_on = []
sts_to_not_run_epik_on = []
for het_st, state_st, het_type in extract_hets_from_complex(
complex_st, het_atoms_lists):
# NOTE: For non-covalent ligands, het_st is same as state_st.
# Hets that don't match user's criteria are skipped completely
if not het_type in types_to_process:
continue
# Epik will be run on some groups, but is not run on groups that
# have valence violations, metals, ions, or excluded groups.
skip_epik = (het_type == HetType.METAL_OR_ION or any(
a.pdbres.strip() in EPIK_EXCLUSION_LIST for a in state_st.atom))
if skip_epik:
sts_to_not_run_epik_on.append(state_st)
else:
sts_to_run_epik_on.append(state_st)
return sts_to_run_epik_on, sts_to_not_run_epik_on
[docs]def filter_undesired_states(orig_st, state_sts):
"""
Returns a subset of state structures, which excludes metal-binding
states for hets that are not within 5A of a metal.
:param orig_st: Original complex structure (receptor, ligands, metals)
:type orig_st: `structure.Structure`
:param state_st: Epik output states to filter.
:type state_st: Iterable of `structure.Structure`
:return: List of filtered structures.
:rtype: List of `structure.Structure`
"""
filtered_states = []
# For each het, whether it's within 5A of a metal:
is_near_metal_dict = {}
for state_st in state_sts:
# Determine whether this is a metal-only state that should be skipped:
# NOTE: If this property is not present, then Epik failed to generate
# states for this structure - which means the output state is
# effectively NOT "Metal only", and should be kept.
if state_st.property.get('b_epik_Metal_Only'):
hetnum = state_st.property['i_ppw_hetnum']
if hetnum not in is_near_metal_dict:
asl = "(atom.%s %i) AND (within 5 metals)" % (HET_NUM_PROP,
hetnum)
matches = analyze.evaluate_asl(orig_st, asl)
is_near_metal_dict[hetnum] = bool(matches)
if not is_near_metal_dict[hetnum]:
continue
filtered_states.append(state_st)
return filtered_states
[docs]def find_ppw_atom(st, anum):
"""
Find the atom in the given structure whose i_ppw_anum property is set to
the given value. ValueError exception is raised if such atom is not found.
"""
matches = analyze.evaluate_asl(st, "(atom.%s %i)" % (ORIG_ANUM_PROP, anum))
if not matches:
raise ValueError("No atom with i_ppw_anum=%s was found" % anum)
return st.atom[matches[0]]
[docs]def apply_state(complex_st, state_st):
"""
Modify the het in complex_st complex (protein/ligand) structure such that
its ionization state matches the output that we got from Epik (state_st).
:param complex_st: Original complex structure (receptor + het)
:type complex_st: `structure.Structure`
:param state_st: Epik output for the het group. May include some atoms
from the receptor if the ligand is covalently-bound.
:type state_st: `structure.Structure`
:return: List of atom indices in complex_st that are part of the het.
:rtype: (dict, list)
"""
heavy_atoms = []
for state_atom in state_st.atom:
if state_atom.element == 'H':
continue # Skip hydrogens
state_anum = state_atom.property.get(ORIG_ANUM_PROP)
if state_anum is None:
# Skip protein atoms for covalently bound ligands
continue
complex_atom = find_ppw_atom(complex_st, state_anum)
ahet = state_atom.property.get(HET_NUM_PROP)
heavy_atoms.append(complex_atom.index)
# Iterate over all bonds that this atom makes, and adjust them to
# match Epik output:
for bond in state_atom.bond:
state_n_atom = bond.atom2
if state_n_atom.element == 'H':
continue # Skip hydrogens
neighbor_anum = state_n_atom.property.get(ORIG_ANUM_PROP)
if neighbor_anum is None:
# Skip protein atoms for covalently bound ligands
continue
nhet = state_n_atom.property.get(HET_NUM_PROP)
# Skip this bond if the neighbor is NOT from the same het (it's
# part of the protein to which the ligand is covalently bound - we
# include some of the protein atoms in the Epik input).
if nhet != ahet:
continue
# Modify the original full receptor structure:
# Ev:96171 Determine which 2 atoms in <complex_st> correspond to this bond:
neighbor_atom = find_ppw_atom(complex_st, neighbor_anum)
complex_st.getBond(complex_atom, neighbor_atom).order = bond.order
# Skip the formal charge if the atom is NOT part of the het (it's
# part of the receptor to which the ligand is covalently bound - we
# include some of the receptor atoms in the Epik input).
if ahet is not None:
complex_atom.formal_charge = state_atom.formal_charge
# Need to re-type after changing bond orders and formal charges:
complex_st.retype()
# Delete and re-add het hydrogens in the complex structure:
hydrogens_to_delete = hydrogen_neighbors(complex_st, heavy_atoms)
renum_map = complex_st.deleteAtoms(hydrogens_to_delete, renumber_map=True)
heavy_atoms = [renum_map[anum] for anum in heavy_atoms]
natoms = complex_st.atom_total
build.add_hydrogens(complex_st, atom_list=heavy_atoms)
het_atoms = heavy_atoms + list(range(natoms + 1, complex_st.atom_total + 1))
return het_atoms
[docs]def apply_state_and_calc_score(complex_st, state_st):
"""
Apply the state <state_st> to <complex_st>, and return the score for the
state in the context of the protein complex.
:param complex_st: Original complex structure (receptor + het)
:type complex_st: `structure.Structure`
:param state_st: Epik output for the het group. May include some atoms
from the receptor if the ligand is covalently-bound.
:type state_st: `structure.Structure`
:return: Tuple of state score, Epik penalty, and information string.
Epik penalty will be None for metal states.
:rtype: (float, float/None, str)
"""
# TODO Move this to be an instance of HetState in the future
het_atoms = apply_state(complex_st, state_st)
state_penalty, metal_penalties = get_state_penalties(state_st)
state_score, info_str = calc_state_score(complex_st, het_atoms,
state_penalty, metal_penalties)
return state_score, state_penalty, info_str
[docs]def get_state_penalties(state_st):
"""
Return the Epik state penalty for this Epik output structure, as well as
metal penalties for each atom that has the r_epik_Metal_State_Penalty
property set.
"""
# Metal states don't go through Epik, so don't have a penalty.
# Setting it to zero here will effectively prevent sorting of
# states for metal hets.
state_penalty = state_st.property.get(STATE_PANELTY_PROP, 0.0)
metal_penalties = {}
for state_atom in state_st.atom:
state_anum = state_atom.property.get(ORIG_ANUM_PROP)
if state_anum is None:
# This as an added hydrogen
continue
metal_penalty = state_atom.property.get(METAL_PENALTY_PROP)
if metal_penalty is not None:
metal_penalties[state_anum] = metal_penalty
return state_penalty, metal_penalties
[docs]def calc_hbond_energy(hydrogen, acceptor):
"""
Calculate H-bond energy. Overly simple, to address PLDB-3337 and similar.
"""
if RANK_NH_O_HIGHER and hydrogen.element == 'H':
donor = next(hydrogen.bonded_atoms)
if donor.element == 'N' and acceptor.element == 'O':
return HBOND_ENERGY_NHO
return HBOND_ENERGY
[docs]def calc_state_score(complex_st, het_atoms, state_penalty, metal_penalties):
"""
Calculate the score for the current het state (must already been applies
to the complex structure).
:het_atoms: List of atoms (complex indexed) that are part of the het.
"""
smallest_metal_penalty = get_smallest_metal_penalty(complex_st,
metal_penalties)
# Total score (higher is better):
score = 0.0
# Calculate the H-Bonds contribution:
hbonds = hbond.get_hydrogen_bonds(complex_st, het_atoms)
hbond_count = len(hbonds)
for bond in hbonds:
score -= calc_hbond_energy(*bond)
if state_penalty is not None:
info_str = "State penalty: %.2f kcal/mol" % state_penalty
score -= state_penalty
else:
# No state penalty; this is a metal state. Score will be 0.0.
info_str = "State penalty: NA"
if smallest_metal_penalty is not None:
# Reward all states with metal interactions by up to 10 points.
# Originally it was planned to replace the state penalty with the
# smallest metal state penalty, but that caused non-metal interacting
# states with better state penalties to be selected.
# Metal state penalties are small positive values - lower is better.
score += (10.0 - smallest_metal_penalty)
info_str = "Metal state penalty: %.2f kcal/mol" % smallest_metal_penalty
# Penalize amides that have an extra hydrogen that can clash
# with a metal. Currently only terminal amides are handled - in
# the future, consider making this algorithm more generic.
num_metal_clashes = get_num_clashes_with_metals(complex_st, het_atoms)
score -= num_metal_clashes * METAL_AMIDE_CLASH_PENALTY
info_str += "; H-Bond count: %i" % hbond_count
total_charge = 0
for anum in het_atoms:
total_charge += complex_st.atom[anum].formal_charge
if total_charge > 0:
charge_str = 'Q: +%i' % total_charge
else:
charge_str = 'Q: %i' % total_charge
# The info string shows the Epik state penalty and the number of
# hydrogen bonds between this state and the receptor.
info_str += "; %s" % charge_str
return score, info_str
def _generate_metal_and_ion_states(st):
"""
Ionize the metal or ion atom in the specified state structure (only one,
if exists) and return a list of ionized sts.
"""
out_states = []
# Ionize the metal:
for atom in st.atom:
charges = METAL_AND_ION_CHARGES.get(atom.atomic_number)
if charges:
# The original state should be the first state (Ev:65897):
out_states.append(st)
for ch in charges:
if ch != atom.formal_charge:
stc = st.copy()
stc.atom[int(atom)].formal_charge = ch
stc.retype()
out_states.append(stc)
if not out_states:
# No metal in <st>, return the original:
out_states.append(st)
return out_states
[docs]def generate_phosphate_and_sulfur_states(sts):
"""
For each ligand state, expand states for the phosphate and sulfate groups.
"""
expanded_sts = []
for st in sts:
# For each input state, generate metal and phosphate states
# Ev:78688
if count_phosphates_and_sulfurs(st) > 4:
expanded_sts.append(st)
continue
for st2 in extend_phosphate_states(st):
for st3 in extend_sulfate_states(st2):
expanded_sts.append(st3)
return expanded_sts
def _find_waters(st):
"""
Return a list of water atoms. Each item in the list is a tuple of 3 water
atom indices. Waters that don't have two hydrogens are skipped.
"""
water_asl = 'water and (atom.ele O)'
water_atoms = analyze.evaluate_asl(st, water_asl)
waters_list = []
for ai in water_atoms:
a = st.atom[ai]
# Make a list of the atoms of this water:
curr_water_atoms = [ai]
for h in a.bonded_atoms:
if h.atomic_number == 1:
curr_water_atoms.append(h.index)
if len(curr_water_atoms) != 3:
print(' WARNING: skipping water since it has %i hydrogens' %
(len(curr_water_atoms) - 1))
continue
curr_water_atoms = tuple(curr_water_atoms)
waters_list.append(curr_water_atoms)
return waters_list
[docs]def get_bridging_waters(st, min_hbonds=3):
"""
Return a list of all waters in the specified structure that make at least
<min_hbonds> number of H-bonds (H-bonds to other waters excluded).
The list contains both oxygen and hydrogen atoms (if present).
"""
waters_list = _find_waters(st)
all_water_atoms = set()
for water_mol_atoms in waters_list:
all_water_atoms.update(water_mol_atoms)
def _get_num_nonwater_hbonds(water_mol_atoms):
count = 0
for (atom1, atom2) in hbond.get_hydrogen_bonds(st, water_mol_atoms):
anum1 = int(atom1)
anum2 = int(atom2)
if anum1 in water_mol_atoms:
other_atom = anum2
else:
other_atom = anum1
if other_atom not in all_water_atoms:
count += 1
return count
bridging_water_atoms = []
for water_atoms in waters_list:
num_hbonds = _get_num_nonwater_hbonds(water_atoms)
if num_hbonds >= min_hbonds:
bridging_water_atoms += water_atoms
return bridging_water_atoms
[docs]def hydrogen_neighbors(st, atoms):
"""
Returns the list of neighbor (bonded) atoms that are hydrogens.
:param st: Structure where atoms are from.
:type st: `structure.Structure`
:param atoms: List of atom indices for the heavy atoms.
:type atoms: list of ints
:return: List of hydrogen atom indices
:rtype: list of ints.
"""
hydrogens = []
for heavy_atom in atoms:
for bonded_atom in st.atom[heavy_atom].bonded_atoms:
if bonded_atom.index not in hydrogens and bonded_atom.atomic_number == 1:
hydrogens.append(bonded_atom.index)
return hydrogens
[docs]def get_pdb_id(st):
"""
Returns the PDB ID of the given structure. If the property does not exist,
returns the string "unknown".
"""
pdbid = st.property.get("s_pdb_PDB_ID")
if not pdbid:
return "unknown"
return pdbid.lower()
[docs]def get_het_name(st, het_atoms, include_chain=False):
"""
Return the het "name" to display for the user for the given het. Assumes
all residues have the same chain name.
Example outputs:
"HEM 123a"
"HEM A:123a"
"GLY-VAL-PRO 110-111-112"
"GLY-VAL-PRO A:110-111-112"
:param st: Structure containing the het
:type st: `structure.Structure`
:param het_atoms: Atoms from this het.
:type het_atoms: list of ints
:param include_chain: Whether the chain name should be included.
:type include_chain: bool
"""
residues = []
for ai in het_atoms:
res = st.atom[ai].getResidue()
if not res in residues:
residues.append(res)
name_str = '-'.join([res.pdbres.strip() for res in residues]) + ' '
if include_chain:
name_str += st.atom[het_atoms[0]].chain + ':'
name_str += '-'.join(
['%i%s' % (res.resnum, res.inscode.strip()) for res in residues])
return name_str
[docs]def add_prepared_props(st):
"""
Adds "prepared" and "prepared with version" properties to the given
structure.
"""
st.property['b_ppw_prepared'] = True
st.property['s_ppw_prepared_with_version'] = schrodinger.get_release_name()
[docs]def generate_bio_units(st):
"""
If the given structure has Biounit data, generate conformations for
each biounit. If multiple bio units are defined, multiple structures
are returned - oner per bio unit.
If no biounit data is present, original structure is returned (in list).
"""
biounits = biounit.biounits_from_structure(st)
if not biounits:
return [st]
sts = [biounit.apply_biounit(st, bu) for bu in biounits]
return sts
[docs]def apply_het_states(orig_st, state_sts, logger):
"""
For each het state, generate a complex structure with that state applied,
and return a list of (state score, complex structure), one for each state.
:param orig_st: Input protein complex structure.
:type orig_st: structure.Structure
:param state_sts: List of ligand states to apply.
:type state_sts: List[structure.Structure]
:param logger: Logger to report output to.
:type logger: logging.Loggger
:return: List of state score and complex structure tuples
:rtype: List[Tuple[float, structure.Structure]]
"""
# For each state for this het:
hetname = get_het_name(state_sts[0],
state_sts[0].getAtomIndices(),
include_chain=True)
logger.debug("Processing het %s: " % hetname)
# TODO Convert this to a list of HetState objects in the future:
applied_states = []
# For each het/lig structure, modify the input complex structure so that
# its het has the same ionization state:
for state_st in state_sts:
complex_st = orig_st.copy()
# Apply the state and calculate the score:
state_score, epik_penalty, info_str = apply_state_and_calc_score(
complex_st, state_st)
applied_states.append((state_score, complex_st))
logger.debug(' number of states applied: %i' % len(applied_states))
return applied_states
[docs]def delete_far_waters(st, angstroms=5):
het_atom_numbers = get_het_atom_numbers(st)
if het_atom_numbers: # hets found
hets_asl = atomsasl(het_atom_numbers)
far_waters_asl = "water and not fillres within %s (%s)" % (angstroms,
hets_asl)
else:
far_waters_asl = "water"
far_water_atoms = analyze.evaluate_asl(st, far_waters_asl)
st.deleteAtoms(far_water_atoms)
[docs]def delete_non_bridging_waters(st, delwater_hbond_cutoff):
"""
Delete waters that do not make at least this number of hydrogen bonds
to non-waters.
"""
# Make a list of all water atoms:
water_atoms = analyze.evaluate_asl(st, "water")
# Make a list of bridging water atoms:
bridging_water_atoms = get_bridging_waters(st, delwater_hbond_cutoff)
# Make a list of water atom that need to be deleted:
del_atoms = [a for a in water_atoms if a not in bridging_water_atoms]
del_waters = [
str(st.atom[a].getResidue())
for a in del_atoms
if st.atom[a].element == "O"
]
if del_waters:
st.deleteAtoms(del_atoms)
return del_waters
[docs]def idealize_hydrogen_temp_factor(st):
"""
Set temperature factor for hydrogens without temperature factors to
that of the bonded heavy atom (if available). This is important
for X-ray refinement to prevent R-factor collapse.
:param st: Input Structure
:type st: Schrodinger.structure
:return: None, but alters hydrogen temperature factors in place.
"""
for atom in st.atom:
if atom.atomic_number != 1 or atom.temperature_factor:
continue
for heavy_atom in atom.bonded_atoms:
temperature_factor = heavy_atom.temperature_factor
if temperature_factor is not None:
atom.temperature_factor = temperature_factor
if atom.alt_xyz is None:
continue
if heavy_atom.alt_xyz is None:
temperature_factor = heavy_atom.temperature_factor
else:
temperature_factor = heavy_atom.property.get(
'r_m_alt_pdb_tfactor')
if temperature_factor is not None:
atom.property['r_m_alt_pdb_tfactor'] = temperature_factor
[docs]def get_het_atom_numbers(st):
"""
Get the list of het groups in a structure
:param st: Input Structure
:type st: schrodinger.structure.Structure
:return: atom numbers in hetero atom fields. Each list is the atoms within
a given hetero group
:rtype: list of list of integers
"""
het_atom_numbers = [] # List of atoms number for ALL hets
for het_atoms in findhets.find_hets(st):
for ai in het_atoms:
het_atom_numbers.append(ai)
return het_atom_numbers
[docs]class PrepWizardSettings(parameters.CompoundParam):
jobname: str = 'prepwizard'
force_field: str = '2005'
preprocess: bool = True
assign_bond_orders: bool = True
use_ccd: bool = True
add_hydrogens: bool = True
readd_hydrogens: bool = False
idealize_hydrogen_tf: bool = True
add_terminal_oxygens: bool = False
cap_termini: bool = False
preprocess_delete_far_waters: bool = False
preprocess_watdist: float = None
delete_far_waters: bool = True
watdist: float = 5.0
treat_metals: bool = True
treat_disulfides: bool = False
treat_glycosylation: bool = False
treat_palmitoylation: bool = False
selenomethionines: bool = False
antibody_cdr_scheme: annotation.AntibodyCDRScheme = annotation.DEFAULT_ANTIBODY_SCHEME
renumber_ab_residues: bool = False
fillsidechains: bool = False
fillloops: bool = False
custom_fasta_file: bool = None
run_epik: bool = True
use_epikx: bool = False
max_states: int = 1
run_protassign: bool = True
samplewater: bool = False
include_epik_states: bool = False
xtal: bool = False
pH: float = 7.0
epik_pH: float = 7.0
epik_pHt: float = 2.0
use_propka: bool = True
propka_pH: float = 7.0
label_pkas: bool = False
force_list: list = []
minimize_adj_h: bool = False
protassign_number_sequential_cycles: int = None
protassign_max_cluster_size: int = None
delwater_hbond_cutoff: int = 0
delete_nonbridging_waters: bool = False
run_impref: bool = True
rmsd: float = 0.3
fixheavy: bool = False
reference_struct: structure.Structure = None
use_PDB_pH: bool = False
preserve_st_titles: bool = False
[docs]def update_task_from_settings(task, settings):
"""
Update the given PPWorkflowTask with these given settings.
"""
input = task.input
input.do_preprocess = settings.preprocess
input.do_hbond = settings.run_protassign
input.do_cleanup = settings.run_impref or settings.delete_far_waters or settings.delete_nonbridging_waters
# PPWorkflowTask has a do_cleanup parameters, while PrepWizardSettings
# does not (the task combines # 3 operations into one).
pwtasks.update_params(input.preprocess, settings)
pwtasks.update_params(input.preprocess.generate_states_settings, settings)
pwtasks.update_params(input.hbond, settings)
pwtasks.update_params(input.cleanup, settings)
@application.require_application(create=True, use_qtcore_app=True)
def prepare_structure(st, options, logger=None):
"""
Prepare the given protein structure using PrepWizard.
:param st: Protein structure to prepare.
:type st: structure.Structure
:param options: PrepWizard options.
:type options: PrepWizardSettings
:param logger: Optional logger to use.
:type logger: log.logging.Logger
:return: Prepared structures. Depending on the number of hets in
the protein, up to options.max_states number of structures will be produced;
by default 1.
:rtype: List[structure.Structure]
"""
task = pwtasks.PPWorkflowTask()
update_task_from_settings(task, options)
task.specifyTaskDir(os.getcwd())
task.name = options.jobname
task.logger = logger
task.input.struct = st
# Run the backend method, without adding a job control layer. This also
# causes all output to go to stdout instead of log files.
task.backendMain()
expanded_sts = task.output.structs
if logger:
logger.info(' Number of states generated: %i' % len(expanded_sts))
return expanded_sts
[docs]def tag_st_het_num_prop(sts):
"""
Set the hetero atom number property to the sequence number in `sts` on the
structures
:param sts: the structures to tag
:type sts: List[structure.Structure]
"""
for idx, st in enumerate(sts, start=1):
st.property[HET_NUM_PROP] = idx