"""
Module for optimizing hydroxyl, thiol and water orientiations, Chi-flips of
asparagine, glutamine and histidine, and protonation states of aspartic acid,
glutamic acid, and histidine.
Usage: ProtAssign(st)
Copyright Schrodinger, LLC. All rights reserved.
"""
import copy
import itertools
import json
import math
import operator
import random
import re
import shutil
import sys
import tempfile
from collections import defaultdict
from dataclasses import dataclass
from dataclasses import field
from pathlib import Path
from typing import Dict
from typing import Iterator
from typing import List
from typing import Optional
from typing import Set
from typing import Tuple
from typing import Type
from typing import Union
import numpy as np
from scipy import ndimage
from scipy.sparse import dok_matrix
from scipy.sparse.csgraph import connected_components
from schrodinger import structure
from schrodinger.structure import Structure
from schrodinger.application.bioluminate import protein
from schrodinger.application.prepwizard2 import prepare
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import minimize
from schrodinger.structutils import transform
from schrodinger.structutils.interactions import find_pi_cation_interactions
from schrodinger.structutils.interactions import find_pi_pi_interactions
from schrodinger.structutils.measure import create_distance_cell
from schrodinger.utils import mmutil
from schrodinger.utils import subprocess
# Check if toulbar2 is available to solve network exact
TOULBAR2_AVAILABLE = shutil.which('toulbar2') is not None
# Flag to use in-house developed rules-based pKa predictor if PROPKA is not
# requested.
USE_PROTASSIGN_PKA_PREDICTOR = mmutil.feature_flag_is_enabled(
mmutil.PROTASSIGN_PKA_PREDICTOR)
LOG_NONE = 0
LOG_BASIC = 1
LOG_EXTRA = 2
LOG_DEBUG = 3
LOG_FULL_DEBUG = 4
LOG_SCORE_DEBUG = 5
DEFAULT_LOG_LEVEL = LOG_BASIC
log_level = DEFAULT_LOG_LEVEL
label_color = 13
pH_vlow = 'very_low'
pH_low = 'low'
pH_neutral = 'neutral'
pH_high = 'high'
# We are moving towards a more complex rules based internally developed pKa
# predictor that has numbers as input. During the transition phase, map the
# string based pH values to numbers if we are using the rules based predictor
# to be in line with the current GUI.
# Once the GUI interface for the simple predictor has been removed, we can get
# rid of this.
PH_STRING_TO_NUMBER = {
pH_vlow: 2.0,
pH_low: 4.0,
pH_neutral: 7.0,
pH_high: 11.0
}
pka_property = 'r_pa_PropKa_pKa'
# Atom property name containing name of residue state
STATE_PROPERTY = 's_pa_state'
ACCEPTOR_PROPERTY = 'b_pa_acceptor'
DONOR_PROPERTY = 'b_pa_donor'
CLASHER_PROPERTY = 'b_pa_clasher'
ION_PROPERTY = 'b_pa_ion'
# Flag whether a donor or acceptor is static
STATIC_PROPERTY = 'b_pa_static'
# Flag that indicates a Structure is annotated for faster self-scoring.
ANNOTATED_PROPERTY = 'b_pa_is_annotated'
# Max distance between changeables to be clustered together
CLUSTERING_DISTANCE = 4.0
# Distance between the heavies of changeables after which the interaction
# energy drops to 0
MAX_HEAVY_HEAVY_INTERACTION_DISTANCE = 4.5
# Maximum distance between certain changeables to orient its hydrogen for
# adding states.
MAX_ORIENT_HYDROGEN_DISTANCE = 5.0
# H-H clash cutoff involving two polar H. The cutoff is set such that the
# hydrogen bond network of 6GST His-14 is resolved correctly.
POLAR_CLASH_CUTOFF = 1.9
# H-H clash cutoff involving at least one non-polar H
# Unknown how this cutoff was determined originally.
NONPOLAR_CLASH_CUTOFF = 1.77
# Acceptor-acceptor clash cutoff
ACCEPTOR_ACCEPTOR_CLASH_CUTOFF = 2.80
BOND_MSG_TEMPLATE = ('Cannot determine flip state of {resstr} due to unexpected'
'covalent bond.')
FLIP = 'Flip'
NO_FLIP = 'No Flip'
RESNAME_FLIPSTATES_MAP = {'ASN': (NO_FLIP, FLIP), 'GLN': (NO_FLIP, FLIP)}
for resname in ('HID', 'HIE', 'HIP'):
RESNAME_FLIPSTATES_MAP[resname] = (resname, f'{FLIP} {resname}')
RESNAME_FLIPSTATES_MAP['HIS'] = RESNAME_FLIPSTATES_MAP['HID']
ND1 = 'ND1'
ND2 = 'ND2'
NE2 = 'NE2'
CD = 'CD'
CD2 = 'CD2'
CE1 = 'CE1'
CG = 'CG'
HIS_PDBNAMES = {CD2, CE1, CG, ND1, NE2}
AMD_PDBNAME_GROUPS = ({CG, 'OD1', ND2}, {CD, 'OE1', NE2})
HIS_NEIGHBOR_MAP = {
ND1: {CG, CE1},
NE2: {CE1, CD2},
CE1: {ND1, NE2},
CD2: {NE2, CG}
}
# Properties
PKA_PROPERTY = 'r_pka_prediction'
INTERACTION_PROPERTY_TEMPLATE = 'b_pka_{0}_interaction{1}'
BULK_SOLVENT_ACCESSIBLE_PROPERTY = 'b_pka_bulk_solvent_accessible'
# Strings
METAL = 'metal'
CARBOXYL = 'carboxyl'
PI_CATION = 'pi-cation'
ARGININE = 'arginine'
PI_PI = 'pi-pi'
ACCEPTOR = 'acceptor'
FORCED_ACCEPTOR = 'forced_acceptor'
STATIC_DONOR = 'static_donor'
# Histidine parameters
HISTIDINE_NAMES = {'HIS ', 'HID ', 'HIE ', 'HIP '}
AMIDE_PDBRES = {'ASN ', 'GLN '}
ASPARTIC_ACID_NAMES = {'ASP ', 'ASH '}
GLUTAMIC_ACID_NAMES = {'GLU ', 'GLH '}
# Residue atom name that carries all the annotation for the empircal pKa
# predictions
ANNOTATED_ATOM_PDBNAME = ' CA '
# Chemical entity related
# H C N O F S Se
ORGANIC_ATOMIC_NUMBERS = {1, 6, 7, 8, 9, 16, 34}
CARBOXYLIC_ACID_SMARTS = '[CX3](=O)[OX1H0-]'
# Some distances and angles shared across classes and functions for emperical
# pKa prediction
MAX_INTERACTION_DISTANCE = 5.0
MAX_HYDROGEN_CLASHING_DISTANCE = 1.8
MAX_HBOND_DISTANCE = 3.0
MIN_HBOND_ANGLE = 120
# Simply utility functions for empircal pKa predictions
[docs]def get_annotated_atom(residue):
"""Returns annotated atom of residue _StructureAtom or None"""
return residue.getAtomByPdbName(ANNOTATED_ATOM_PDBNAME)
[docs]def get_annotated_atom_property(residue):
"""Returns property dict of annotated atom or an empty dict if atom is not present"""
atom = get_annotated_atom(residue)
if atom is None:
return {}
return atom.property
[docs]def get_pka(residue):
"""Return predicted pKa of residue"""
return get_annotated_atom_property(residue).get(PKA_PROPERTY)
[docs]def set_pka(residue, pka):
"""Set predicted pKa of residue"""
p = get_annotated_atom_property(residue)
p[PKA_PROPERTY] = pka
[docs]def shift_pka(residue, amount):
"""Shift the predicted pKa of residue"""
p = get_annotated_atom_property(residue)
p[PKA_PROPERTY] += amount
[docs]def get_interaction_label(label, suffix):
"""Create property label for a given interaction."""
if suffix is None:
suffix = ''
else:
suffix = '_' + suffix
return INTERACTION_PROPERTY_TEMPLATE.format(label, suffix)
[docs]def set_interaction_label(residue, label, suffix=None):
"""Set a label of the annoted atom of residue to True"""
p = get_annotated_atom_property(residue)
key = get_interaction_label(label, suffix)
p[key] = True
[docs]def has_interaction_label(residue, label, suffix=None):
"""Check if the annotated atom of the residue has a label"""
key = get_interaction_label(label, suffix)
return bool(get_annotated_atom_property(residue).get(key))
[docs]def residue_to_label(residue_or_atom):
"""Create string from residue or atom."""
pdbname = ''
if hasattr(residue_or_atom, 'getResidue'):
pdbname = ' ' + residue_or_atom.pdbname.strip()
residue = residue_or_atom.getResidue()
else:
residue = residue_or_atom
return f'{residue.pdbres.strip()} {residue}{pdbname}'
[docs]def log_interaction(residue1, residue2, name, distance=None, angle=None):
"""Log the interaction between two residues and its distance and angle optionally"""
label1 = residue_to_label(residue1)
label2 = residue_to_label(residue2)
msg = [f'Found {name} interaction between {label1} and {label2}']
if distance is not None:
msg.append(f' Distance: {distance:.2f}')
if angle is not None:
msg.append(f' Angle: {angle:.2f}')
report(LOG_BASIC, '\n'.join(msg))
class _Grid:
"""A simple grid class to detect bulk solvent water"""
def __init__(self, array, spacing, origin):
"""
:param array: Array of grid
:type array: numpy.ndarray with ndim 3
:param spacing: Spacing between grid points in angstrom
:type spacing: float
:param origin: Origin of grid
:type origin: ndarray of length 3
"""
self.array = array
self.spacing = spacing
self.origin = origin
@classmethod
def from_structure(cls, st, spacing=0.75, padding=None):
"""
Create grid based on structure
:param spacing: Spacing between grid points in angstrom
:type spacing: float
:param padding: Added space to each dimension based on min and max of XYZ coordinates
:type padding: float
"""
xyz = st.getXYZ()
origin = xyz.min(axis=0) - padding
right_top = xyz.max(axis=0) + padding
shape = ((right_top - origin) / spacing + 0.5).astype(int)[::-1]
array = np.ones(shape, bool)
return cls(array, spacing, origin)
[docs]def get_bulk_solvent_accessible_atoms(st,
radius=2.5,
radius2=3.0,
spacing=0.50,
include_water=False):
"""
Return bulk solvent accessible atoms.
Bulk solvent accessible atoms are determined by creating a grid and setting
occupied voxels to 1 using a fixed radius for each atom and then checking
if any non-occupied voxels are present within a slightly larger radius.
:param st: Structure to determine bulk solvent accessible atoms. The atom property BULK_SOLVENT_ACCESSIBLE_PROPERTY will be set for each atom.
:type st: Structure
:param radius: Initial radius
:type radius: float
:param radius2:
:type radius2: float
:param spacing: Spacing of grid. Lower spacing makes the calculation more accurate but requires more compute time and memory.
:type spacing: float
:param include_water: Whether to keep waters or not when calculating bulk solvent accessible atoms.
:type include_water: bool
:return: List of atom indices that are bulk solvent accessible
:rtype: List[int]
"""
grid = _Grid.from_structure(st, spacing=spacing, padding=2 * radius)
array = grid.array
if include_water:
xyzs = st.getXYZ()
else:
xyzs = np.asarray([
a.xyz for a in st.atom if a.pdbres not in {'HOH ', 'DOD ', 'SPC '}
])
cutoff2 = (radius / grid.spacing)**2
for xyz in xyzs:
center = (xyz - grid.origin) / grid.spacing
start = (center - radius / grid.spacing + 0.5).astype(int)
end = (center + radius / grid.spacing).astype(int) + 1
dz2 = np.square(center[2] - np.arange(start[2], end[2])).reshape(
-1, 1, 1)
dy2 = np.square(center[1] - np.arange(start[1], end[1])).reshape(
1, -1, 1)
dx2 = np.square(center[0] - np.arange(start[0], end[0])).reshape(
1, 1, -1)
distances2 = dz2 + dy2 + dx2
occupied = np.invert(distances2 <= cutoff2)
array[start[2]:end[2], start[1]:end[1], start[0]:end[0]] &= occupied
bulk_solvent, num_features = ndimage.label(array)
value = bulk_solvent[0, 0, 0]
bulk_solvent[bulk_solvent != value] = 0
iatoms = []
cutoff2 = (radius2 / grid.spacing)**2
for atom in st.atom:
center = (atom.xyz - grid.origin) / grid.spacing
start = (center - radius2 / grid.spacing + 0.5).astype(int)
end = (center + radius2 / grid.spacing).astype(int) + 1
dz2 = np.square(center[2] - np.arange(start[2], end[2])).reshape(
-1, 1, 1)
dy2 = np.square(center[1] - np.arange(start[1], end[1])).reshape(
1, -1, 1)
dx2 = np.square(center[0] - np.arange(start[0], end[0])).reshape(
1, 1, -1)
distances2 = dz2 + dy2 + dx2
occupied = distances2 <= cutoff2
value = np.any(bulk_solvent[start[2]:end[2], start[1]:end[1],
start[0]:end[0]][occupied])
atom.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY] = False
if value == 0:
continue
iatoms.append(int(atom))
atom.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY] = True
if atom.atomic_number == 1:
# Set neighboring heavy atom as bulk solvent accessible
for ba in atom.bonded_atoms:
ba.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY] = True
iatoms.append(ba.index)
# if a heavy atom is bulk solvent accessible, then also its hydrogens are.
for iatom in iatoms:
atom = st.atom[iatom]
if not atom.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY]:
continue
if atom.atomic_number == 1:
continue
for ba in atom.bonded_atoms:
if ba.atomic_number == 1:
ba.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY] = True
iatoms.append(ba.index)
return sorted(set(iatoms))
[docs]def get_unsatisfied_donors(st, include_bsa=True):
"""
Return unsatisfied buried donors
:param st: Structure to analyze
:type st: Structure
:param include_bsa: Include bulk solvent accessible atoms
:type include_bsa: bool
:return: List of unsatisfied donor atoms
:rtype: List[_StructureAtom]
"""
dcell = create_distance_cell(st, MAX_HBOND_DISTANCE, honor_pbc=False)
unsatisfied = set()
for atom1 in st.atom:
if atom1.atomic_number != 1:
continue
hydrogen = atom1
heavy = None
for ba in hydrogen.bonded_atoms:
heavy = ba
if heavy is None:
continue
if heavy.atomic_number in (1, 6):
continue
residue1 = hydrogen.getResidue()
is_hydrogen_bonded = False
is_clashing = False
for iatom2 in dcell.query(*hydrogen.xyz):
atom2 = st.atom[iatom2]
residue2 = atom2.getResidue()
if str(residue1) == str(residue2):
continue
distance = st.measure(hydrogen, atom2)
if atom2.atomic_number == 1 and distance < MAX_HYDROGEN_CLASHING_DISTANCE:
is_clashing = True
if atom2.atomic_number in (1, 6):
continue
if atom2.pdbname == ' N ':
continue
if distance > MAX_HBOND_DISTANCE:
continue
# If the hydrogen is clashing it is also unsatisfied
angle = st.measure(heavy, hydrogen, atom2)
if angle >= MIN_HBOND_ANGLE:
is_hydrogen_bonded = True
if not is_hydrogen_bonded or is_clashing:
unsatisfied.add(heavy.index)
if not include_bsa:
bsa = set(get_bulk_solvent_accessible_atoms(st))
unsatisfied = unsatisfied - bsa
return [st.atom[iatom] for iatom in sorted(unsatisfied)]
[docs]def get_carboxyl_atoms(residue):
"""
Get carboxyl atom groups from residue. Returns multiple groups if
multiple are present. The first atom will be the carbon. It
specifically adds Glu and Asp even if they are protonated and uses a
SMARTS pattern otherwise to extract the carboxyl group.
:param residue: Residue to check
:type residue: _Residue
:return: List of carboxyl atoms
:rtype: List[List[_StructureAtom]]
"""
carboxyls = []
# Add neutral carboxyls of glu and asp also to list so that a
# ProtAssign prepped structure also works, as ProtAssign will protonate
# all asps and glus while processing the structure in intermediate states.
is_asp = residue.pdbres in ASPARTIC_ACID_NAMES
is_glu = residue.pdbres in GLUTAMIC_ACID_NAMES
c = None
if is_asp:
c = residue.getAtomByPdbName(' CG ')
elif is_glu:
c = residue.getAtomByPdbName(' CD ')
if c is not None:
carboxyl = [c]
for neighbor in c.bonded_atoms:
if neighbor.atomic_number == 8:
carboxyl.append(neighbor)
if len(carboxyl) == 3:
# Check if the carboxyl is neutral. If so add it to the
# pool, otherwise it will be added in the next section
for oxygen in carboxyl[1:]:
if len(list(oxygen.bonded_atoms)) == 2:
carboxyls.append(carboxyl)
break
# Extract residue structure for fast SMARTS matching
st = residue.structure
atom_indices = residue.getAtomIndices()
residue_st = residue.extractStructure()
matches = analyze.evaluate_smarts_canvas(residue_st, CARBOXYLIC_ACID_SMARTS)
for match in matches:
assert len(match) == 3
carboxyl = []
for m in match:
atom = st.atom[atom_indices[m - 1]]
assert atom.atomic_number in (6, 8)
carboxyl.append(atom)
continue
carboxyl = sorted(carboxyl, key=lambda a: a.atomic_number)
carboxyls.append(carboxyl)
return carboxyls
[docs]def get_residue_neighborhoods(st, residues, distance_cell):
neighborhoods = defaultdict(list)
for residue in residues:
neighboring_residues = {}
for atom in residue.atom:
for iatom2 in distance_cell.query(*atom.xyz):
residue2 = st.atom[iatom2].getResidue()
if str(residue) == str(residue2):
continue
neighboring_residues[str(residue2)] = residue2
residues = list(neighboring_residues.values())
neighborhoods[str(residue)] = residues
return neighborhoods
[docs]def protonate_histidine(histidine):
nd1 = histidine.getAtomByPdbName(' ND1')
ne2 = histidine.getAtomByPdbName(' NE2')
ce1 = histidine.getAtomByPdbName(' CE1')
cd2 = histidine.getAtomByPdbName(' CD2')
cg = histidine.getAtomByPdbName(' CG ')
nd1.formal_charge = 1
ne2.formal_charge = 0
st = histidine.structure
st.getBond(nd1, ce1).type = structure.BondType.Double
st.getBond(cd2, ne2).type = structure.BondType.Single
st.getBond(ce1, ne2).type = structure.BondType.Single
st.getBond(cg, cd2).type = structure.BondType.Double
build.add_hydrogens(st)
[docs]def charge_arginine_sidechains(st):
"""
Make arginine sidechains charged. Assumes bond orders have been assigned correctly.
Looks for the nitrogen that has a double bond to the CZ atom, and changes
the formal charge to 1 and retypes atom.
"""
for residue in st.residue:
if residue.pdbres != 'ARG ':
continue
cz = residue.getAtomByPdbName(' CZ ')
if cz is None:
continue
# Get the nitrogen that has a double bond with its neighboring heavy
# atom as that is the one that should be charged.
for nh_pdbname in (' NH2', ' NH1'):
nh = residue.getAtomByPdbName(nh_pdbname)
# Nothing to do if atom doesnt exist or formal charge is already 1
if nh is None or nh.formal_charge == 1:
continue
if st.getBond(nh, cz).order == 2:
# Set formal charges and retype
nh.formal_charge = 1
nh.retype()
break
[docs]class HistidinepKaPredictor:
"""Empirical histidine pKa predictor"""
# pKa related numbers
INTERNAL_PKA = 6.5
# Negative shifts
METAL_PKA_SHIFT = -4.0
STATIC_DONOR_PKA_SHIFT = -3.0
FORCED_ACCEPTOR_PKA_SHIFT = -3.0
PI_CATION_PKA_SHIFT = -1.0
# Postive shifts
PI_PI_PKA_SHIFT = 1.0
CARBOXYLIC_ACID_PKA_SHIFT = 1.0
DOUBLE_SIDED_HBOND_PKA_SHIFT = 1.0
# Distance and angle cutoffs to determine interactions
MAX_STATIC_DONOR_INTERACTION_DISTANCE = 3.0
MIN_STATIC_DONOR_INTERACTION_ANGLE = 120
MAX_STATIC_ACCEPTOR_INTERACTION_DISTANCE = 2.5
MIN_STATIC_ACCEPTOR_INTERACTION_ANGLE = 120
MAX_METAL_INTERACTION_DISTANCE = 3.5
MAX_PI_PI_INTERACTION_DISTANCE = 4.5
MAX_PI_PI_INTERACTION_ANGLE = 30
MAX_PI_CATION_INTERACTION_DISTANCE = 4.5
MAX_PI_CATION_INTERACTION_ANGLE = 30
MAX_PI_ARG_INTERACTION_DISTANCE = 5.5
MAX_CARBOXYLIC_ACID_INTERACTION_DISTANCE = 3.8
MIN_CARBOXYLIC_ACID_INTERACTION_ANGLE = 140
NITROGEN_PDBNAMES = {' ND1', ' NE2'}
CARBON_PDBNAMES = {' CD2', ' CE1'}
SIDECHAINS_PDBNAMES = NITROGEN_PDBNAMES | CARBON_PDBNAMES | {' CG '}
[docs] def predict(self, st, protassign=None, flip=False):
"""
Predict histidine pKa with empirical rules.
Currently this requires the structure being annotated by ProtAssign.
:param st: ProtAssign annotated structure
:type st: Structure
:param protassign: ProtAssign instance to calculate forced acceptor interactions
:type protassign: schrodinger.protein.assignment.ProtAssign
:param flip: Calculate pKa of flipped version of histidne
:type flip: bool
Returns None but annotates ANNOTATED_ATOM of each histidine with
found interactions and predicted pKa
"""
self._st = st
# Gather all histidines that have their nitrogen and carbon atoms
self._histidines = []
for r in st.residue:
if r.pdbres not in HISTIDINE_NAMES:
continue
for pdbname in self.SIDECHAINS_PDBNAMES:
atom = r.getAtomByPdbName(pdbname)
if atom is None:
break
else:
self._histidines.append(r)
self._distance_cell = create_distance_cell(st,
MAX_INTERACTION_DISTANCE,
honor_pbc=False)
self._nitrogen_pdbnames = self.CARBON_PDBNAMES if flip else self.NITROGEN_PDBNAMES
self._neighborhoods = get_residue_neighborhoods(st, self._histidines,
self._distance_cell)
for histidine in self._histidines:
set_pka(histidine, self.INTERNAL_PKA)
# Lower pKa values
self.metal_interactions()
self.identify_forced_amide_states()
self.static_donor_interactions()
self.cation_pi_interactions()
self.arginine_interactions()
# Increase pKa values
self.pi_pi_interactions()
self.carboxyl_interactions()
self.static_acceptor_interactions()
# Forced acceptor pKa calculations are expensive and may not be
# required; it also requires a protassign instance to be provided.
if protassign is not None:
report(LOG_BASIC,
"Determining pKa penalties for forced acceptor histidines.")
self.forced_acceptor_interactions(st, protassign)
[docs] def cation_pi_interactions(self):
report(LOG_BASIC,
"Determining pKa penalties for pi-cation interactions")
st = self._st
cation_pi_interactions = find_pi_cation_interactions(st)
for cpi in cation_pi_interactions:
atom = st.atom[cpi.pi_centroid.atoms[0]]
if atom.pdbres not in HISTIDINE_NAMES:
continue
if cpi.distance > self.MAX_PI_CATION_INTERACTION_DISTANCE:
continue
min_angle = self.MAX_PI_CATION_INTERACTION_ANGLE
max_angle = 180 - min_angle
if min_angle < abs(cpi.angle) < max_angle:
continue
cation = st.atom[cpi.cation_centroid.atoms[0]].getResidue()
# ProtAssign charges ASN and GLN to do it's stuff...
if cation.pdbres in AMIDE_PDBRES:
continue
histidine = atom.getResidue()
set_interaction_label(histidine, PI_CATION)
shift_pka(histidine, self.PI_CATION_PKA_SHIFT)
log_interaction(histidine, cation, PI_CATION, cpi.distance,
cpi.angle)
[docs] def arginine_interactions(self):
report(LOG_BASIC, "Determining pKa penalties for arginine interactions")
for his in self._histidines:
args = [
r for r in self._neighborhoods[str(his)] if r.pdbres == 'ARG '
]
if not args:
continue
cg = his.getAtomByPdbName(' CG ')
nd1 = his.getAtomByPdbName(' ND1')
ne2 = his.getAtomByPdbName(' NE2')
nd1_xyz = np.asarray(nd1.xyz)
ne2_xyz = np.asarray(ne2.xyz)
centroid1 = (nd1_xyz + ne2_xyz) / 2.0
# Calculate the normal of the histidine plane
v1 = nd1_xyz - cg.xyz
v2 = ne2_xyz - cg.xyz
n1 = np.cross(v1, v2)
n1 /= np.linalg.norm(n1)
for arg in args:
nh2 = arg.getAtomByPdbName(' NH2')
nh1 = arg.getAtomByPdbName(' NH1')
ne = arg.getAtomByPdbName(' NE ')
cz = arg.getAtomByPdbName(' CZ ')
if None in (nh2, nh1, ne, cz):
continue
centroid2 = np.asarray(cz.xyz)
distance = np.linalg.norm(centroid1 - centroid2)
if distance > self.MAX_PI_ARG_INTERACTION_DISTANCE:
continue
v1 = nh2.xyz - centroid2
v2 = nh1.xyz - centroid2
n2 = np.cross(v1, v2)
n2 /= np.linalg.norm(n2)
theta = np.arccos(np.inner(n1, n2))
angle = abs(np.rad2deg(theta))
if self.MAX_PI_CATION_INTERACTION_ANGLE < angle < (
180 - self.MAX_PI_CATION_INTERACTION_ANGLE):
continue
if not (has_interaction_label(his, ARGININE) or
has_interaction_label(his, PI_CATION)):
shift_pka(his, self.PI_CATION_PKA_SHIFT)
set_interaction_label(his, ARGININE)
log_interaction(his, arg, ARGININE, distance, angle)
[docs] def pi_pi_interactions(self):
"""Check if histidine is involved in pi-pi interaction"""
report(LOG_BASIC, "Determining pKa penalties for pi-pi interactions")
st = self._st
pi_pi_interactions = find_pi_pi_interactions(st)
for ppi in pi_pi_interactions:
if ppi.distance > self.MAX_PI_PI_INTERACTION_DISTANCE:
continue
max_angle = self.MAX_PI_PI_INTERACTION_ANGLE
min_angle = 180 - max_angle
if max_angle < abs(ppi.angle) < min_angle:
continue
for ring1 in (ppi.ring1, ppi.ring2):
iatom1 = ring1.atoms[0]
atom1 = st.atom[iatom1]
if atom1.pdbres not in HISTIDINE_NAMES:
continue
histidine = atom1.getResidue()
if not has_interaction_label(histidine, PI_PI):
shift_pka(histidine, self.PI_PI_PKA_SHIFT)
set_interaction_label(histidine, PI_PI)
# Get other residue to report on
ring2 = ppi.ring1
if ring2.atoms[0] == iatom1:
ring2 = ppi.ring2
iatom2 = ring2.atoms[0]
residue = st.atom[iatom2].getResidue()
log_interaction(histidine, residue, PI_PI, ppi.distance,
ppi.angle)
[docs] def carboxyl_interactions(self):
"""Check if histidine is interacting with a carboxyl group."""
report(LOG_BASIC, "Determining pKa penalties for carboxyl interactions")
st = self._st
for his in self._histidines:
carboxyls = []
for residue in self._neighborhoods[str(his)]:
carboxyls += get_carboxyl_atoms(residue)
# Now check for interactions
for c, o1, o2 in carboxyls:
o_ave_xyz = (np.asarray(o1.xyz) + o2.xyz) / 2.0
# Set O1 to average O position to measure distance and angle
o1_xyz = o1.xyz
o1.xyz = o_ave_xyz
carboxyl_residue = c.getResidue()
# One carboxyl only interacts once with a histidine, but can
# act as an acceptor for multiple atoms in the histidine.
is_interacting = False
for pdbname in self._nitrogen_pdbnames:
n = his.getAtomByPdbName(pdbname)
distance = st.measure(o1, n)
if distance > self.MAX_CARBOXYLIC_ACID_INTERACTION_DISTANCE:
continue
angle = st.measure(c, o1, n)
if angle < self.MIN_CARBOXYLIC_ACID_INTERACTION_ANGLE:
continue
# Do not count the same carboxyl more than once
if not is_interacting:
set_interaction_label(his, CARBOXYL)
shift_pka(his, self.CARBOXYLIC_ACID_PKA_SHIFT)
log_interaction(n, carboxyl_residue, CARBOXYL, distance,
angle)
set_interaction_label(his, ACCEPTOR, pdbname.strip())
is_interacting = True
o1.xyz = o1_xyz
[docs] def static_donor_interactions(self):
"""Check if histidine is interacting with a static donor"""
report(LOG_BASIC, "Determining pKa interactions for static donors")
st = self._st
for his in self._histidines:
static_donors = []
for residue in self._neighborhoods[str(his)]:
for atom in residue.atom:
if atom.atomic_number == 1:
continue
# We consider lysines static
p = atom.property
is_static_donor = bool(
p.get(DONOR_PROPERTY) and p.get(STATIC_PROPERTY))
is_static_donor |= atom.pdbres in {
'LYS ', 'LYH '
} and atom.pdbname == ' NZ '
if not is_static_donor:
continue
for bonded_atom in atom.bonded_atoms:
if bonded_atom.atomic_number != 1:
continue
if not bonded_atom.property.get(DONOR_PROPERTY):
continue
static_donors.append((atom, bonded_atom))
iterator = itertools.product(self._nitrogen_pdbnames, static_donors)
for pdbname, (donor, hydrogen) in iterator:
nitrogen = his.getAtomByPdbName(pdbname)
distance = st.measure(hydrogen, nitrogen)
if distance > self.MAX_STATIC_DONOR_INTERACTION_DISTANCE:
continue
angle = st.measure(nitrogen, hydrogen, donor)
if angle < self.MIN_STATIC_DONOR_INTERACTION_ANGLE:
continue
if not has_interaction_label(his, STATIC_DONOR):
shift_pka(his, self.STATIC_DONOR_PKA_SHIFT)
set_interaction_label(his, STATIC_DONOR)
log_interaction(nitrogen, donor, STATIC_DONOR, distance, angle)
[docs] def identify_forced_amide_states(self):
"""
Identify amide residues that are forced in a state due to them
interacting with a static donor. This is done in a single pass and not
iteratively.
"""
st = self._st
for residue in st.residue:
# ProtAssign resets the flip of the amide so we need to check both
# the O and N atom to see if it is interacting with a static donor.
if residue.pdbres == 'ASN ':
o_pdbname = ' OD1'
n_pdbname = ' ND2'
elif residue.pdbres == 'GLN ':
o_pdbname = ' OE1'
n_pdbname = ' NE2'
else:
continue
o1 = residue.getAtomByPdbName(o_pdbname)
n1 = residue.getAtomByPdbName(n_pdbname)
if o1 is None or n1 is None:
continue
# Remove previous assigned property names of protassign to amides
for a in (o1, n1):
p = a.property
p.pop(DONOR_PROPERTY, None)
p.pop(ACCEPTOR_PROPERTY, None)
for o, n in [(o1, n1), (n1, o1)]:
for iatom in self._distance_cell.query(*o.xyz):
atom = st.atom[iatom]
if str(atom.getResidue()) == str(residue):
continue
if atom.atomic_number == 1:
continue
p = atom.property
if not (p.get(DONOR_PROPERTY) and p.get(STATIC_PROPERTY)):
continue
for hydrogen in atom.bonded_atoms:
if hydrogen.atomic_number != 1:
continue
distance = st.measure(o, hydrogen)
if distance > self.MAX_STATIC_DONOR_INTERACTION_DISTANCE:
continue
angle = st.measure(atom, hydrogen, o)
if angle < self.MIN_STATIC_DONOR_INTERACTION_ANGLE:
continue
o.property[ACCEPTOR_PROPERTY] = True
o.property[STATIC_PROPERTY] = True
n.property[DONOR_PROPERTY] = True
n.property[STATIC_PROPERTY] = True
report(
LOG_BASIC,
f"Forced acceptor/donor found for amide {residue.pdbres} {residue}"
)
if o.property.get(ACCEPTOR_PROPERTY) and n.property.get(
ACCEPTOR_PROPERTY):
report(
LOG_BASIC,
f"Amide {residue.pdbres} {residue} is interacting with static donor on both sides"
)
for atom in (o, n):
del atom.property[ACCEPTOR_PROPERTY]
del atom.property[DONOR_PROPERTY]
del atom.property[STATIC_PROPERTY]
[docs] def static_acceptor_interactions(self):
"""Check if histidine is interacting with a static acceptor"""
report(LOG_BASIC, "Determining pKa interactions for static acceptors")
st = self._st
for his in self._histidines:
# If histidine is already involved in a static donor interaction,
# don't bother.
if any(
has_interaction_label(his, label)
for label in (METAL, STATIC_DONOR)):
continue
# The histidine needs to be protonated in this function. Extract
# histidine and protonate
hip_st = his.extractStructure()
hip = next(iter(hip_st.residue))
protonate_histidine(hip)
acceptors = []
for residue in self._neighborhoods[str(his)]:
for atom in residue.atom:
p = atom.property
if p.get(ACCEPTOR_PROPERTY):
if str(atom.getResidue()) == str(his):
continue
acceptor = hip_st.addAtom(atom.element, *atom.xyz)
acceptor.pdbres = atom.pdbres
acceptor.resnum = atom.resnum
acceptor.chain = atom.chain
acceptors.append(acceptor)
iterator = itertools.product(self._nitrogen_pdbnames, acceptors)
for pdbname, acceptor in iterator:
nitrogen = hip.getAtomByPdbName(pdbname)
hydrogen = None
for ba in nitrogen.bonded_atoms:
if ba.atomic_number == 1:
hydrogen = ba
assert hydrogen is not None
distance = hip_st.measure(hydrogen, acceptor)
if distance > self.MAX_STATIC_ACCEPTOR_INTERACTION_DISTANCE:
continue
angle = hip_st.measure(nitrogen, hydrogen, acceptor)
if angle < self.MIN_STATIC_ACCEPTOR_INTERACTION_ANGLE:
continue
set_interaction_label(his, ACCEPTOR, pdbname.strip())
log_interaction(nitrogen, acceptor, ACCEPTOR, distance, angle)
for nitrogens in self._nitrogen_pdbnames:
if all(
has_interaction_label(his, ACCEPTOR, n)
for n in nitrogens):
if not has_interaction_label(his, ACCEPTOR):
shift_pka(his, self.DOUBLE_SIDED_HBOND_PKA_SHIFT)
set_interaction_label(his, ACCEPTOR)
report(LOG_BASIC, f"Both nitrogens are donating of {his}")
class _CarboxylicAcidpKaPredictor:
# pKa related
INTERNAL_PKA = None
# Shifts
METAL_PKA_SHIFT = -3.0
STATIC_ACCEPTOR_PKA_SHIFT = 4.0
CARBOXYLIC_ACID_PKA_SHIFT = 4.0
# Interaction related distances and angles
MAX_STATIC_ACCEPTOR_INTERACTION_DISTANCE = 3.5
MIN_STATIC_ACCEPTOR_INTERACTION_ANGLE = 150
MAX_METAL_INTERACTION_DISTANCE = 3.5
MAX_CARBOXYLIC_ACID_INTERACTION_DISTANCE = 3.8
MIN_CARBOXYLIC_ACID_INTERACTION_ANGLE = 140
# Some names/strings
PDBRES_NAMES = None
CARBON_PDBNAME = None
OXYGEN_PDBNAMES = None
SIDECHAIN_PDBNAMES = None
def predict(self, st):
self._st = st
self._residues = []
for r in st.residue:
if r.pdbres not in self.PDBRES_NAMES:
continue
for pdbname in self.SIDECHAIN_PDBNAMES:
atom = r.getAtomByPdbName(pdbname)
if atom is None:
break
else:
self._residues.append(r)
for residue in self._residues:
set_pka(residue, self.INTERNAL_PKA)
self._distance_cell = create_distance_cell(st,
MAX_INTERACTION_DISTANCE,
honor_pbc=False)
self._neighborhoods = get_residue_neighborhoods(st, self._residues,
self._distance_cell)
# Shift pKa down. Favor charged state
self.metal_interactions()
# Shift pKa up. Favor neutral state
self.carboxyl_interactions()
self.static_acceptor_interactions()
def static_acceptor_interactions(self):
"""Check if histidine is interacting with a static acceptor"""
report(LOG_BASIC, "Determining pKa interactions for static acceptors")
st = self._st
for carboxylic_residue in self._residues:
if has_interaction_label(carboxylic_residue, METAL):
continue
if has_interaction_label(carboxylic_residue, CARBOXYL):
continue
acceptors = []
for residue in self._neighborhoods[str(carboxylic_residue)]:
for atom in residue.atom:
p = atom.property
if p.get(ACCEPTOR_PROPERTY) and p.get(STATIC_PROPERTY):
acceptors.append(atom)
carbon = carboxylic_residue.getAtomByPdbName(self.CARBON_PDBNAME)
oxygen1 = carboxylic_residue.getAtomByPdbName(
self.OXYGEN_PDBNAMES[0])
oxygen2 = carboxylic_residue.getAtomByPdbName(
self.OXYGEN_PDBNAMES[1])
oxygen1_xyz = np.asarray(oxygen1.xyz, float)
ave_oxygen_xyz = (oxygen1_xyz + oxygen2.xyz) / 2.0
oxygen1.xyz = ave_oxygen_xyz
for acceptor in acceptors:
if str(acceptor.getResidue()) == str(carboxylic_residue):
continue
distance = st.measure(oxygen1, acceptor)
if distance > self.MAX_STATIC_ACCEPTOR_INTERACTION_DISTANCE:
continue
angle = st.measure(carbon, oxygen1, acceptor)
if angle < self.MIN_STATIC_ACCEPTOR_INTERACTION_ANGLE:
continue
if not has_interaction_label(carboxylic_residue, ACCEPTOR):
set_interaction_label(carboxylic_residue, ACCEPTOR)
shift_pka(carboxylic_residue,
self.STATIC_ACCEPTOR_PKA_SHIFT)
log_interaction(oxygen1, acceptor, ACCEPTOR, distance, angle)
# Reset oxygen position
oxygen1.xyz = oxygen1_xyz
def metal_interactions(self):
report(LOG_BASIC, "Determining pKa penalties for metal interactions")
st = self._st
for coo_residue in self._residues:
metals = []
residues = self._neighborhoods[str(coo_residue)]
for residue in residues:
for atom in residue.atom:
if atom.atomic_number not in ORGANIC_ATOMIC_NUMBERS:
metals.append(atom)
if not metals:
continue
for pdbname in self.OXYGEN_PDBNAMES:
oxygen = coo_residue.getAtomByPdbName(pdbname)
if oxygen is None:
continue
for metal in metals:
distance = st.measure(oxygen, metal)
if distance > self.MAX_METAL_INTERACTION_DISTANCE:
continue
set_interaction_label(coo_residue, METAL)
shift_pka(coo_residue, self.METAL_PKA_SHIFT)
break
def carboxyl_interactions(self):
"""Check if histidine is interacting with a carboxyl group."""
report(LOG_BASIC, "Determining pKa penalties for carboxyl interactions")
st = self._st
for coo_residue in self._residues:
carboxyls = []
for residue in self._neighborhoods[str(coo_residue)]:
carboxyls += get_carboxyl_atoms(residue)
# Now check for interactions
carbon = coo_residue.getAtomByPdbName(self.CARBON_PDBNAME)
oxygen1 = coo_residue.getAtomByPdbName(self.OXYGEN_PDBNAMES[0])
oxygen2 = coo_residue.getAtomByPdbName(self.OXYGEN_PDBNAMES[1])
oxygen1_xyz = np.asarray(oxygen1.xyz, float)
ave_oxygen_xyz = (oxygen1_xyz + oxygen2.xyz) / 2.0
oxygen1.xyz = ave_oxygen_xyz
for c, o1, o2 in carboxyls:
carboxyl_residue = c.getResidue()
for oxygen in (o1, o2):
distance = st.measure(oxygen1, oxygen)
if distance > self.MAX_CARBOXYLIC_ACID_INTERACTION_DISTANCE:
continue
angle = st.measure(carbon, oxygen1, oxygen)
if angle < self.MIN_CARBOXYLIC_ACID_INTERACTION_ANGLE:
continue
# Do not count the same carboxyl more than once. Also do
# not protonate both carboxyls, although it is inclear
# whether they should both be protonated or just a single
# one.
if not has_interaction_label(
coo_residue,
CARBOXYL) and not has_interaction_label(
carboxyl_residue, CARBOXYL):
set_interaction_label(coo_residue, CARBOXYL)
shift_pka(coo_residue, self.CARBOXYLIC_ACID_PKA_SHIFT)
log_interaction(oxygen1, carboxyl_residue, CARBOXYL,
distance, angle)
oxygen1.xyz = oxygen1_xyz
[docs]class AsparticAcidpKaPredictor(_CarboxylicAcidpKaPredictor):
"""Aspartic acid emperical pKa predictor"""
# Taken from average of pKa values in cleaned up PKAD database
INTERNAL_PKA = 3.4
PDBRES_NAMES = ASPARTIC_ACID_NAMES
CARBON_PDBNAME = ' CG '
OXYGEN_PDBNAMES = [' OD1', ' OD2']
SIDECHAIN_PDBNAMES = OXYGEN_PDBNAMES + [CARBON_PDBNAME]
[docs]class GlutamicAcidpKaPredictor(_CarboxylicAcidpKaPredictor):
"""Glutamic acid emperical pKa predictor"""
# Taken from average of pKa values in cleaned up PKAD database
INTERNAL_PKA = 4.2
PDBRES_NAMES = GLUTAMIC_ACID_NAMES
CARBON_PDBNAME = ' CD '
OXYGEN_PDBNAMES = [' OE1', ' OE2']
SIDECHAIN_PDBNAMES = OXYGEN_PDBNAMES + [CARBON_PDBNAME]
[docs]class PropKaException(Exception):
[docs] def __init__(self, value):
self.parameter = value
[docs]def report(message_level=1, message=''):
if message_level <= log_level:
print(message)
sys.stdout.flush()
return
[docs]def measure(ct,
atom1=None,
atom2=None,
atom3=None,
atom4=None,
use_xtal=False,
max_dist=10.0):
# A replacement for ct.measure() which also supports xtal symmetry, and can return the coordinates of
# the nearest xtal copies of each atom when requested.
def find_nearest_atom_image(ct, atom1, atom2):
lowest_distance = None
best_match = None
atoms = analyze.evaluate_asl(ct, 'atom.i_pa_atomindex %d' % atom2)
for atom in atoms:
distance = ct.measure(atom1, atom)
if lowest_distance is None or distance < lowest_distance:
lowest_distance = distance
best_match = int(atom)
return best_match
def extract(ct, atoms):
new_ct = structure.create_new_structure(len(atoms))
for i, atom in enumerate(atoms):
new_atom = new_ct.atom[i + 1]
old_atom = ct.atom[atom]
new_atom.element = old_atom.element
new_atom.xyz = old_atom.xyz
new_atom.color = old_atom.color
new_atom.atom_type = old_atom.atom_type
new_atom.property['i_pa_atomindex'] = old_atom.property[
'i_pa_atomindex']
for property in [
's_pdb_PDB_CRYST1_Space_Group', 'r_pdb_PDB_CRYST1_a',
'r_pdb_PDB_CRYST1_b', 'r_pdb_PDB_CRYST1_c',
'r_pdb_PDB_CRYST1_alpha', 'r_pdb_PDB_CRYST1_beta',
'r_pdb_PDB_CRYST1_gamma'
]:
new_ct.property[property] = ct.property[property]
return new_ct
def merge_mates(atom_ct, atom_ct_mates):
atom_ct_mates.insert(0, atom_ct)
for i, mate in enumerate(atom_ct_mates):
for atom in mate.atom:
atom.property['i_pa_xtalatom'] = i
for i in range(1, len(atom_ct_mates)):
atom_ct_mates[0].extend(atom_ct_mates[i])
return atom_ct_mates
def find_atom1(atom_ct_mates, atom1):
for atom in atom_ct_mates[0].atom:
if atom.property['i_pa_xtalatom'] == 0 and atom.property[
'i_pa_atomindex'] == atom1:
xtal_atom1 = int(atom)
break
return xtal_atom1
def create_list(atom1, atom2, atom3, atom4):
if atom4 is not None:
atoms = [atom1, atom2, atom3, atom4]
elif atom3 is not None:
atoms = [atom1, atom2, atom3]
else:
atoms = [atom1, atom2]
return atoms
def find_other_atoms(atom_ct_mates, xtal_atom1, atom2, atom3, atom4):
xtal_atom2 = find_nearest_atom_image(atom_ct_mates[0], xtal_atom1,
atom2)
if atom3 is not None:
xtal_atom3 = find_nearest_atom_image(atom_ct_mates[0], xtal_atom2,
atom3)
else:
xtal_atom3 = None
if atom4 is not None:
xtal_atom4 = find_nearest_atom_image(atom_ct_mates[0], xtal_atom3,
atom4)
else:
xtal_atom4 = None
return (xtal_atom2, xtal_atom3, xtal_atom4)
# Only do the expensive xtal version of measure if it's a surface atom (or if not sure if it's a surface atom).
if use_xtal and ('i_pa_surfaceatom' not in ct.atom[atom1].property or
ct.atom[atom1].property['i_pa_surfaceatom'] == 1):
atoms = create_list(atom1, atom2, atom3, atom4)
atom_ct = extract(ct, atoms)
atom_ct_mates = analyze.generate_crystal_mates(atom_ct, radius=max_dist)
atom_ct_mates = merge_mates(atom_ct, atom_ct_mates)
# Get atom1 (in the primary cell).
xtal_atom1 = find_atom1(atom_ct_mates, atom1)
# Get atom2 (the closest to atom1), and so on.
(xtal_atom2, xtal_atom3,
xtal_atom4) = find_other_atoms(atom_ct_mates, xtal_atom1, atom2, atom3,
atom4)
return atom_ct_mates[0].measure(xtal_atom1, xtal_atom2, xtal_atom3,
xtal_atom4)
else:
value = ct.measure(atom1, atom2, atom3, atom4)
return value
[docs]def calculate_interaction_matrix(ct: structure.Structure,
iatoms: List[int],
distance: float,
use_xtal: bool = False) -> Dict[int, set]:
"""
Create an interaction matrix based on the changeable_index atom property
:param ct: Structure with annotated atoms having set the
i_pa_changeable_index corresponding the the index of the changeable
:param iatoms: List of atom indices which take part in interaction
:param distance: Max distance between interacting atoms
:param use_xtal: Take into account crystal symmetry mates
:param use_xtal: bool
:return: interaction matrix allowing double indexing: interact[i, j]
"""
# Create a distance cell to query containing only the atoms of interest
interact = defaultdict(set)
if not iatoms:
return interact
ct_part = ct.extract(iatoms, copy_props=True)
if use_xtal:
mates = analyze.generate_crystal_mates(ct_part, distance)
for mate in mates[1:]:
ct_part.extend(mate)
dcell = create_distance_cell(ct_part, distance, honor_pbc=False)
for iatom1 in iatoms:
atom1 = ct.atom[iatom1]
index1 = atom1.property['i_pa_changeable_index']
for iatom2 in dcell.query(*atom1.xyz):
index2 = ct_part.atom[iatom2].property['i_pa_changeable_index']
# Changeables can currently only interact with other
# changeables. It is possible that a changeable has
# self-interaction with its crystal mate, but this is not
# implemented down-stream, so we ignore that for now.
if index1 == index2:
continue
interact[index1].add(index2)
interact[index2].add(index1)
return interact
[docs]def annotate_structure_interactors(ct, acceptors, donors, clashers):
"""
Set atom property for each interactor class
:param ct: Structure to annotate
:type ct: Structure
:param acceptors: List of acceptor atom indices
:type acceptors: List[int]
:param donors: List of donor pair atom indices
:type donors: List[tuple[int, int]]
:param clashers: List of clasher atom indices
:type clashers: List[int]
:return: None but sets atom properties
:rtype: NoneType
"""
for acceptor in acceptors:
ct.atom[acceptor].property[ACCEPTOR_PROPERTY] = 1
for heavy, hydrogen in donors:
if heavy == hydrogen:
ct.atom[heavy].property[ION_PROPERTY] = 1
else:
ct.atom[heavy].property[DONOR_PROPERTY] = 1
ct.atom[hydrogen].property[DONOR_PROPERTY] = 1
for clasher in clashers:
ct.atom[clasher].property[CLASHER_PROPERTY] = 1
[docs]def generate_annotated_ct(ct, donors, acceptors, clashers, use_xtal=False):
"""
Generate an annotated Structure that contains crystal mates.
The annotated heavily speeds up the self scoring step for large and xtal structures
:param ct: Structure to annotate
:type ct: Structure
:param donors: List of donor pairs
:type donors: List[Tuple[int, int]]
:param acceptors: List of acceptor atom indices
:type acceptors: List[int]
:param clashers: List of clasher atom indices
:type clashers: List[int]
:param use_xtal: Create crystal mates in annotated output structure
:type use_xtal: bool
:return: New annotated structure with property ANNOTATED_PROPERTY set to True
:rtype: Structure
"""
# Annotate structure so we can use an efficient distance cell
# to extract close contacts during self-scoring.
working_ct = ct.copy()
annotate_structure_interactors(working_ct, acceptors, donors, clashers)
# Setup a crystal mate ct if necessary.
if use_xtal:
mates = analyze.generate_crystal_mates(
working_ct, radius=MAX_HEAVY_HEAVY_INTERACTION_DISTANCE)
# The first one is just a copy of the original
for mate in mates[1:]:
working_ct.extend(mate)
working_ct.property[ANNOTATED_PROPERTY] = True
return working_ct
[docs]def check_residue_flip_state(res: structure._Residue) -> tuple:
"""
Determine whether a residue cannot be flipped, is, or is not flipped.
:param res: a protein residue
:return: a tuple of `(state, msg)`, where `state` describes whether the
residue is flipped (`True`), is not flipped (`False`), or cannot be
flipped (`None`); if `None`, `msg` will contain an explanation
:rtype: tuple[bool or NoneType, str]
"""
pdbname_atom_map = {
atom.pdbname.strip(): atom
for atom in res.atom
if atom.atomic_number != 1
}
is_amd = any(
all(n in pdbname_atom_map
for n in pdbnames)
for pdbnames in AMD_PDBNAME_GROUPS)
is_his = all(name in pdbname_atom_map for name in HIS_PDBNAMES)
is_flipped, msg = None, ''
if not is_amd and not is_his:
msg = (f'Cannot determine flip state of {get_residue_string(res)}: only'
f' {RESNAME_FLIPSTATES_MAP.keys()} can be flipped.')
elif is_amd:
# Residue is an amide; verify that there are no unexpected bonds
nitrogen = pdbname_atom_map.get(ND2) or pdbname_atom_map.get(NE2)
carbon = pdbname_atom_map.get(CD) or pdbname_atom_map.get(CG)
bound_atoms = [a for a in get_heavy_neighbors(nitrogen) if a != carbon]
if bound_atoms:
msg = BOND_MSG_TEMPLATE.format(resstr=get_residue_string(res))
else:
is_flipped = carbon.property.get(STATE_PROPERTY) == FLIP
else:
# Residue is a histidine; verify that there are no unexpected bonds
for pdbname, neighbor_pdbnames in HIS_NEIGHBOR_MAP.items():
atom = pdbname_atom_map[pdbname]
exp_neighbors = [
pdbname_atom_map[name] for name in neighbor_pdbnames
]
bound_atoms = [
a for a in get_heavy_neighbors(atom) if a not in exp_neighbors
]
if bound_atoms:
msg = BOND_MSG_TEMPLATE.format(resstr=get_residue_string(res))
break
else:
carbon = pdbname_atom_map.get(CG)
flip_tag = carbon.property.get(STATE_PROPERTY)
is_flipped = flip_tag is not None and flip_tag.startswith(FLIP)
return is_flipped, msg
[docs]def get_residue_flip_state(res: structure._Residue) -> Union[bool, type(None)]:
"""
Return the flip state of a protein residue.
A truncated version of `check_residue_flip_state()`.
:param res: a protein residue
:return: the flip state of a residue
"""
flip_state, _ = check_residue_flip_state(res)
return flip_state
[docs]def get_residue_string(residue_or_atom) -> str:
"""
Return a string describing a residue from a residue or atom.
The string will match the format
<chain>:<residue PDB code> <residue number>[<insertion code>]
:param residue_or_atom: a residue or atom
:type residue_or_atom: _Residue or _StructureAtom
:return: a string describing the residue
"""
chain = '_' if residue_or_atom.chain == ' ' else residue_or_atom.chain
pdbres = residue_or_atom.pdbres.strip()
inscode = residue_or_atom.inscode.strip()
return f'{chain}:{pdbres} {residue_or_atom.resnum}{inscode}'
[docs]def get_atom_string(atom):
"""Return a string describing atom"""
return get_residue_string(atom) + ':' + atom.pdbname
[docs]def get_heavy_neighbors(atom: structure._StructureAtom) -> list:
"""
:param atom: an atom
:return: a list of heavy (non-H) atoms covalently bound to `atom`
:rtype: list[structure._StructureAtom]
"""
return [
bond.atom2
for bond in atom.bond
if bond.atom2.atomic_number != 1 and bond.order != 0
]
[docs]def get_residue_from_changeable(ct, changeable):
heavies = changeable.get_heavies()
return ct.atom[heavies[0]].getResidue()
@dataclass
class _Interactors:
"""
Container for atom indices of hydrogen-bond acceptors, donors, and clashers
"""
acceptors: List[int] = field(default_factory=list)
donors: List[Tuple[int, int]] = field(default_factory=list)
clashers: List[int] = field(default_factory=list)
def remap(self, mapper: Dict[int, int]):
"""
Remap atom indices of interactors.
:param mapper: Dictionary mapping old atom indices to new indices
"""
self.acceptors = [mapper[a] for a in self.acceptors]
self.clashers = [mapper[a] for a in self.clashers]
self.donors = [(mapper[a1], mapper[a2]) for a1, a2 in self.donors]
@dataclass
class _WaterState:
"""
Container for water states
:param coordinates: Hydrogen coordinates of water state
:type coordinates: Tuple[List[float], List[float]]
:param donating_to: List of atom indices to which water is donating
:type donating_to: List[int]
:param accepting_from: List of atom indices from water is accepting a hydrogen
:type accepting_from: List[int]
"""
# hydrogen1 and hydrogen2 coordinates
coordinates: Tuple[List[float], List[float]]
donating_to: List[int] = field(default_factory=list)
accepting_from: List[int] = field(default_factory=list)
[docs]class WaterStateEnumerator:
"""
Enumerate discrete water states that are hydrogen bonding with nearby acceptors and donors
The goal is to sample likely states while also limiting the number of
states as solving the combinatorial problem gets harder with more and more
states.
"""
# OH bond lenght of water in angstrom
OH_LENGTH = 1.000
# HOH angle of water in degrees
HOH_ANGLE = 109.5
# Minimum required hydrogen to non-static heavy donor atom distance for it
# to be not considered clashing.
MIN_HYDROGEN_NONSTATIC_DONOR_DISTANCE = 2.5
[docs] def __init__(self, ct, oxygen, acceptors, donors):
"""
:param ct: Annotated structure with donor/acceptor and static flags.
:type ct: Structure
:param oxygen: Oxygen atom of water
:type oxygen: _StructureAtom
:param acceptors: List of acceptor atom indices
:type acceptors: List[int]
:param donors: List of donor atom indices
:type donors: List[Tuple[int, int]]
"""
self.ct = ct
self.oxygen = oxygen
self.hydrogen1, self.hydrogen2 = list(oxygen.bonded_atoms)
self.acceptors = acceptors
self.donors = donors
# These attributes will be filled by precalculate and will contain a
# mapping of atom index to distance or unit vector from oxygen to atom
self._vector_lookup = {}
self._distance_lookup = {}
self._precalculate()
self._filter_acceptor_donors()
self._separate_donors()
def _precalculate(self):
"""
Pre-calculate all unit vectors and distances from water oxygen to
acceptor and heavy donor atoms
"""
oxygen_xyz = np.asarray(self.oxygen.xyz, float)
for iheavy in itertools.chain(self.acceptors, *self.donors):
heavy = self.ct.atom[iheavy]
if heavy.atomic_number == 1:
continue
if iheavy == self.oxygen.index:
continue
vector = heavy.xyz - oxygen_xyz
distance = np.linalg.norm(vector)
if distance < 0.01:
report(
LOG_BASIC,
f"Water is severily clashing with {heavy.pdbres} {heavy.resnum}{heavy.inscode} {heavy.chain}"
)
continue
vector /= distance
self._vector_lookup[iheavy] = vector
self._distance_lookup[iheavy] = distance
def _filter_acceptor_donors(self):
"""
Filter acceptor and donor atoms up to a certain distance and remove
water from donor list. This is to limit the number of states that are
enumerated.
Resets the acceptors and donors attributes
"""
distance_lookup = self._distance_lookup
if distance_lookup:
# The typical max distance between heavy atoms is 4.0 of the
# acceptors and donors. This is probably too far and we can reduce
# the distance and filter the acceptors and donors for more
# efficient state generation.
# Look for interactors within a certain distance. To limit the number
# of states generated, stop after at least 2 interactors are found. We
# start at a distance of 3.5 which is pretty generous.
distances = np.asarray(list(distance_lookup.values()))
for max_distance in np.linspace(3.5, 4.0, num=11, endpoint=True):
ninteractors = (distances <= max_distance).sum()
if ninteractors > 1:
break
# Filter acceptors
filtered_acceptors = []
for iheavy in self.acceptors:
distance = distance_lookup.get(iheavy)
if distance is None:
continue
if distance <= max_distance:
filtered_acceptors.append(iheavy)
# Filter donors
filtered_donors = []
for iheavy, ihydrogen in self.donors:
distance = distance_lookup.get(iheavy)
if distance is None:
continue
if distance <= max_distance:
filtered_donors.append((iheavy, ihydrogen))
self.acceptors = filtered_acceptors
self.donors = filtered_donors
def _separate_donors(self):
"""Separate the donors into metals and a list of heavy atom"""
# The donor list also contains metals. Separate these out.
donor_heavies = []
for heavy, hydrogen in self.donors:
# Its a metal if heavy == hydrogen
if heavy != hydrogen:
donor_heavies.append(heavy)
# Some heavies can be in the list multiple times, e.g. other waters
# have two O-H groups where the O is the same atom.
self.donor_heavies = sorted(set(donor_heavies))
def _is_clashing_with_static_donor(self, donor_heavy):
"""
Check if any of the water hydogens is clashing with a static hydrogen donor
:param donor_heavy: Donor heavy atom
:type donor_heavy: _StructureAtom
:return: True if clashing, False if not
:rtype: bool
"""
p = donor_heavy.property
if not (p.get(DONOR_PROPERTY) and p.get(STATIC_PROPERTY)):
return False
for iatom in donor_heavy.bonded_atoms:
atom = self.ct.atom[iatom]
if atom.atomic_number != 1:
continue
for hydrogen in (self.hydrogen1, self.hydrogen2):
distance = self.ct.measure(iatom, hydrogen)
if distance < POLAR_CLASH_CUTOFF:
return True
return False
def _might_clash_with_nonstatic_donor(self, hydrogen, donor_heavy):
"""
Check if hydrogen might be clashing with hydrogen of a nonstatic donor
:param hydrogen: Water hydrogen to check
:type hydrogen: _StructureAtom
:param donor_heavy: Donor heavy atom
:type donor_heavy: _StructureAtom
:return: True if clashing, False if not
:rtype: bool
"""
p = donor_heavy.property
if not p.get(DONOR_PROPERTY) or p.get(STATIC_PROPERTY):
return False
distance = self.ct.measure(hydrogen, donor_heavy)
return distance < self.MIN_HYDROGEN_NONSTATIC_DONOR_DISTANCE
[docs] def enumerate_acceptor_acceptor_states(self):
"""
Enumerate states where water is donating to two acceptors at the same time
:return: List of water states
:rtype: List[_WaterState]
"""
# Align waters so it is hydrogen bonded with 2 acceptors.
# Rename so we don't have self everywhere
oxygen = self.oxygen
hydrogen1 = self.hydrogen1
hydrogen2 = self.hydrogen2
ct = self.ct
states = []
heavies = itertools.chain(self.acceptors, self.donor_heavies)
for iheavy1, iheavy2 in itertools.combinations(heavies, 2):
angle = ct.measure(iheavy1, oxygen, iheavy2)
is_static_donor = False
for iatom in (iheavy1, iheavy2):
p = ct.atom[iatom].property
if p.get(STATIC_PROPERTY) and p.get(DONOR_PROPERTY):
is_static_donor = True
if is_static_donor:
continue
# Check if angle falls within a reasonable interval so the
# water can actually hydrogen bond to both at the same time
if not (80 < angle <= 170):
continue
vector1 = self._vector_lookup[iheavy1]
vector2 = self._vector_lookup[iheavy2]
hydrogen1.xyz = oxygen.xyz + vector1 * self.OH_LENGTH
hydrogen2.xyz = oxygen.xyz + vector2 * self.OH_LENGTH
# Since the scoring function used during optimization is so
# dependent on distance, align perfectly with the closest
# acceptor
heavy1 = ct.atom[iheavy1]
heavy2 = ct.atom[iheavy2]
distance1 = self._distance_lookup[iheavy1]
distance2 = self._distance_lookup[iheavy2]
if distance1 <= distance2:
ct.adjust(self.HOH_ANGLE, hydrogen1, oxygen, hydrogen2)
else:
ct.adjust(self.HOH_ANGLE, hydrogen2, oxygen, hydrogen1)
# Add the state if the water is not clashing with a static donor
clashing = any(
self._is_clashing_with_static_donor(heavy)
for heavy in (heavy1, heavy2))
if clashing:
continue
state = _WaterState(coordinates=(hydrogen1.xyz, hydrogen2.xyz),
donating_to=[iheavy1, iheavy2])
states.append(state)
return states
[docs] def enumerate_donor_donor_states(self):
"""
Enumerate states where water is accepting from two donors at the same time
:return: List of water states
:rtype: List[_WaterState]
"""
# Generate states where water is accepting from 2 donors
oxygen = self.oxygen
hydrogen1 = self.hydrogen1
hydrogen2 = self.hydrogen2
states = []
angle = 180 - self.HOH_ANGLE / 2.0
for donor1, donor2 in itertools.combinations(self.donor_heavies, 2):
heavy1 = self.ct.atom[donor1]
heavy2 = self.ct.atom[donor2]
average_xyz = (np.asarray(heavy1.xyz) + heavy2.xyz) / 2.0
vector = -(average_xyz - oxygen.xyz)
distance = np.linalg.norm(vector)
if distance < 0.01:
continue
vector /= distance
hydrogen1.xyz = np.asarray(oxygen.xyz) + vector * self.OH_LENGTH
hydrogen2.xyz = hydrogen1.xyz
# Temporarily set the heavy coordinate to the average to place
# the hydrogens
heavy1_xyz = heavy1.xyz
heavy1.xyz = average_xyz
self.ct.adjust(angle, heavy1, oxygen, hydrogen1)
self.ct.adjust(-angle, heavy1, oxygen, hydrogen2)
heavy1.xyz = heavy1_xyz
state = _WaterState(coordinates=(hydrogen1.xyz, hydrogen2.xyz),
accepting_from=[int(heavy1),
int(heavy2)])
states.append(state)
return states
[docs] def enumerate_acceptor_states(self):
"""
Enumerate states where water is donating to a single acceptor
:return: List of water states
:rtype: List[_WaterState]
"""
# Generate states where water is donating to a single acceptor,
# while letting other hydrogen not point towards static donors.
oxygen = self.oxygen
hydrogen1 = self.hydrogen1
hydrogen2 = self.hydrogen2
states = []
for iheavy in sorted(
set(itertools.chain(self.acceptors, self.donor_heavies))):
vector = self._vector_lookup[iheavy]
heavy = self.ct.atom[iheavy]
if heavy.property.get(STATIC_PROPERTY) and heavy.property.get(
DONOR_PROPERTY):
continue
# Check if angle falls within a reasonable interval so the
hydrogen1.xyz = oxygen.xyz + vector * self.OH_LENGTH
# Move hydrogen2 so it makes the correct angle with hydrogen1
hydrogen2.xyz = hydrogen1.xyz
if self._is_clashing_with_static_donor(heavy):
continue
self.ct.adjust(self.HOH_ANGLE, hydrogen1, oxygen, hydrogen2)
# Keep a list of the other heavy donors to calculate clashes
other_donor_heavies = list(self.donor_heavies)
try:
other_donor_heavies.remove(iheavy)
except ValueError:
pass
# Check if hydrogen2 is interacting with a static donor. If
# so, generate a state where it is not clashing
axis = self._vector_lookup[iheavy]
for angle in [None, 120, 120]:
if angle is not None:
self.rotate_hydrogens_along_axis(axis, angle)
clashing = any(
self._is_clashing_with_static_donor(self.ct.atom[iheavy2])
for iheavy2 in other_donor_heavies)
if clashing:
continue
state = _WaterState(coordinates=(hydrogen1.xyz, hydrogen2.xyz),
donating_to=[iheavy])
states.append(state)
# Check if the current configuration is interacting with a
# non-static donor. If so, move on to the next dihedral angle iteration
# to add another state so the donor could donate to the
# water in some of its configurations.
# If not, hydrogen2 is on a stable non-clashing position
# and we are done.
clashing = any(
self._might_clash_with_nonstatic_donor(
hydrogen2, self.ct.atom[iheavy2])
for iheavy2 in other_donor_heavies)
if not clashing:
break
return states
[docs] def enumerate_donor_states(self):
"""
Enumerate states where water is accepting from a single donor
:return: List of water states
:rtype: List[_WaterState]
"""
oxygen = self.oxygen
hydrogen1 = self.hydrogen1
hydrogen2 = self.hydrogen2
ct = self.ct
states = []
hydrogen_angle = 180 - self.HOH_ANGLE / 2.0
oxygen_xyz = np.asarray(oxygen.xyz)
heavies_aligned_to = set()
for iheavy, ihydrogen in self.donors:
if iheavy in heavies_aligned_to:
continue
# Check if the donor is a static donor, since then we know that the
# hydrogen position is fixed. If it is not static then we need to
# assume that the hydrogen will be pointing straight towards the
# water oxygen and we can use the heavy atom to align the angle.
iatom_to_alignto = ihydrogen
if not ct.atom[iheavy].property.get(STATIC_PROPERTY):
iatom_to_alignto = iheavy
heavies_aligned_to.add(iheavy)
ct.adjust(hydrogen_angle, iatom_to_alignto, oxygen, hydrogen1)
hydrogen2.xyz = hydrogen1.xyz
ct.adjust(-hydrogen_angle, iatom_to_alignto, oxygen, hydrogen2)
axis = -self._vector_lookup[iheavy]
# Keep a list of the other heavy donors to calculate clashes
other_donor_heavies = list(self.donor_heavies)
try:
other_donor_heavies.remove(iheavy)
except ValueError:
pass
for angle in [None, 120, 120]:
if angle is not None:
self.rotate_hydrogens_along_axis(axis, angle)
# Check if hydrogen is clashing with static donor
clashing = any(
self._is_clashing_with_static_donor(ct.atom[iheavy2])
for iheavy2 in other_donor_heavies)
if clashing:
continue
# State is valid. Add it to the pool
state = _WaterState(coordinates=(hydrogen1.xyz, hydrogen2.xyz),
accepting_from=[iheavy])
states.append(state)
# Check if this state might be clashing with the hydrogen of a
# non-static donor. If so, generate another state.
clashing = any(
self._might_clash_with_nonstatic_donor(
hydrogen2, ct.atom[iheavy2])
for iheavy2 in other_donor_heavies)
if not clashing:
break
return states
[docs] def rotate_hydrogens_along_axis(self, axis, angle):
"""
Rotate the water hydrogens along an axis by an angle. Does not return
anything but moves hydrogens in place.
:param axis: Axis along to rotate to
:type axis: 3 floats
:param angle: Angle to rotate in degrees
:type angle: float
"""
# Rotate water along fixed oxygen - donor axis.
ct = self.ct
x, y, z = self.oxygen.xyz
hydrogens = [int(self.hydrogen1), int(self.hydrogen2)]
# Move to origin
transform.translate_structure(ct, -x, -y, -z, atom_index_list=hydrogens)
# Perform the rotation
rotation = transform.get_rotation_matrix(axis, angle)
transform.transform_structure(ct, rotation, atom_index_list=hydrogens)
# Move back to original position
transform.translate_structure(ct, x, y, z, atom_index_list=hydrogens)
[docs]class NetworkSolver:
"""Wrapper around toulbar2 that exactly solves the hydrogen bond network problem"""
# Toulbar2 requires a precision to work with as it is internally working
# with integers. Specify how many decimals are included.
PRECISION = 5
# Maximum time that can be spend on solving the network in seconds. Either
# toulbar2 finds the optimum quickly or it takes a long-long time.
MAX_TIME = 30
# Name of the executable that is called
TOULBAR2 = 'toulbar2'
[docs] def __init__(self, cluster, interact, upper_bound):
"""
:param cluster: Hydrogen bond network cluster that will be optimized
:type cluster: ProtAssign.hbond_cluster
:param interact: Changeable interaction lookup table.
:type interact: Dict[int, Dict[int, bool]]
:param upper_bound: Upper energy bound for the network energy. Required for toulbar2
:type upper_bound: float
"""
self.cluster = cluster
self.interact = interact
self.upper_bound = upper_bound
# Save intermediate files in current directory if we are debugging
self._dir = None if log_level < LOG_DEBUG else '.'
self.setup_toulbar2_inputs()
def __del__(self):
"""Clean up the json/cfn toulbar2 input file"""
if log_level < LOG_DEBUG:
Path(self.toulbar2_input).unlink(missing_ok=True)
def _run_toulbar2(self, options=None):
"""
Run toulbar2 silently
:param options: Additional options passed to toulbar2
:type options: Dict[str, str]
:return: Output file name of toulbar2
:rtype: str
"""
cmd = [self.TOULBAR2, self.toulbar2_input, f'-timer={self.MAX_TIME}']
if options:
for key, value in options.items():
cmd.append(f'-{key}={value}')
out = tempfile.NamedTemporaryFile(dir=self._dir).name
cmd.append(f'-w={out}')
if log_level < LOG_DEBUG:
stderr = stdout = subprocess.DEVNULL
else:
stdout = open("toulbar2.log", "a")
stderr = open("toulbar2.err", "a")
# Be very forgiving when toulbar2 fails, as ProtAssign is a central
# technology.
try:
subprocess.run(cmd, stdout=stdout, stderr=stderr)
except Exception:
report(LOG_BASIC, "Toulbar2 invocation failed")
return out
[docs] def parse_and_delete_output_file(self, out):
"""
Parse and then delete toulbar2 output file that contains the chosen
state for each changeable
:param out: File name of output file
:type out: str
:return: List of chosen state of each changeable
:rtype: List[List[int]]
"""
solutions = []
fpath = Path(out)
if not fpath.is_file():
return solutions
with fpath.open() as f:
for line in f:
try:
sol = [int(x) for x in line.strip().split()]
except ValueError:
report(LOG_BASIC,
"Could not interpret line in toulbar2 output file")
report(LOG_BASIC, f"Offending line: {line}")
continue
# Also check if lenght of solution is similar to number of
# changeables just in case...
if len(sol) != len(self.cluster.changeables):
report(
LOG_BASIC,
"Number of changeables does not match solution length from toulbar2"
)
continue
solutions.append(sol)
if log_level < LOG_DEBUG:
fpath.unlink(missing_ok=True)
return solutions
[docs] def optimal_solution(self):
"""
Run toulbar2 to get the optimal solution
:return: Optimal state combination. If toulbar2 fails returns None
:rtype: List[int] or None
"""
out = self._run_toulbar2()
solutions = self.parse_and_delete_output_file(out)
if solutions:
return solutions[0]
[docs] def explore_solutions(self, upper_bound=None, number=1000):
"""
Run toulbar2 to greedily obtain a number of solutions with an energy
below a certain upper bound. Note that there are no guarantees here
about diversity, though each solution will be unique.
:param upper_bound: upper bound energy. All solutions will have an energy lower than this
:type upper_bound: float
:param number: Max number of solutions to find
:type number: int
:return: List of state combinations
:rtype: List[List[int]]
"""
report(LOG_BASIC, "Solving network.")
options = {'a': f'{number}'}
if upper_bound is not None:
options['ub'] = format(upper_bound, f'.{self.PRECISION}f')
out = self._run_toulbar2(options=options)
solutions = self.parse_and_delete_output_file(out)
return solutions
[docs]class ProtAssign:
[docs] class changeable:
asl = 'none'
max_hbond_distance = 3.5
hbond_min_angle = 150.0
hbond_heavy_min_angle = 80.0
hbond_heavy_max_angle = 140.0
[docs] def __init__(self, ct, iatom):
self.pka = None
self.index = 0
self.current_state = 0 #determine
self.initial_state = 0 #determine
self.treated = False
self.locked = False
self.valid = True
self.name = ''
[docs] def pre_treat_1(self, ct):
return
[docs] def pre_treat_2(self, ct):
return
[docs] def pre_treat(self, ct):
self.pre_treat_1(ct)
build.add_hydrogens(ct, atom_list=self.get_heavies())
self.pre_treat_2(ct)
self.treated = True
return
[docs] def enumerate_states(self,
ct,
acceptors,
donors,
pH,
do_flips=True,
include_initial=False):
return
[docs] def lock_protonation(self):
return
[docs] def add_current_to_states(self, ct):
return
[docs] def assign_state(self,
ct,
istate,
add_labels=True,
label_pkas=False,
state_gap=None,
verbose=False):
if not self.treated:
self.pre_treat(ct)
self.treated = False
return []
[docs] def assign_state_gap(self, atom, state_gaps, report_gaps=True):
"""
Write the Gap in energy between the lowest energy state and the
state with different protonation states or heavy atom positions to
the output ct
:param atom: The atom that should have properties written to it
:type atom: structure.StructureAtom
:param state_gaps: The energy gaps between states for a given
changeable position.
:type state_gaps: A dictionary where the keys are strings (state
names) and the values are floats (energy in kcals of the lowest
energy combination that has that state)
:param report_gaps: Whether to report the gaps to the log file as
well
:type report_gaps: bool
"""
for p in "r_pa_state_gap", :
if p in atom.property:
del atom.property[p]
if state_gaps is None or len(state_gaps) <= 1:
return
min_gap = sorted(state_gaps[state] for state in state_gaps)[1]
atom.property["r_pa_state_gap"] = min_gap
if log_level >= LOG_EXTRA:
cutoff = 100
else:
cutoff = 2.5
if report_gaps and min_gap <= cutoff:
report(LOG_BASIC, (
" Alternative States for {0:10s} Gap {1:5.2f} kcals/mol"
.format(self.name, min_gap)))
for state in sorted(state_gaps, key=lambda k: state_gaps[k]):
if state_gaps[state] <= cutoff:
report(
LOG_BASIC,
" {0:8s} {1:5.2f} kcals/mol".format(
state, state_gaps[state]))
[docs] def update_atom_indices(self, ct: Structure, new_indices: Dict[int,
int]):
return
[docs] def get_new_index(self, ct: Structure, atom_index: int,
new_indices: Dict[int, int]):
return new_indices.get(atom_index)
[docs] def get_view_atoms(self):
return []
[docs] def swap_atoms(self, ct, atom1, atom2):
(temp_x, temp_y, temp_z) = ct.atom[atom1].xyz
ct.atom[atom1].xyz = ct.atom[atom2].xyz
ct.atom[atom2].xyz = (temp_x, temp_y, temp_z)
return
[docs] def get_penalty(self, istate):
return 0.0
[docs] def get_adjustable_atoms(self):
return []
[docs] def change_pka(self, pka, pH):
return False
[docs] def change_empirical_pka(self, pH):
pass
[docs] def get_close_interactors(self, ct, dcell):
"""
Return acceptors, donors and clashers that are close to this changeable
heavy atoms.
:param ct: Structure with annotated atoms signfying interaction class
:type ct: Structure
:param dcell: Distance cell to query for neighboring atoms
:type dcell: DistanceCell
:returns: List of acceptors, donor heavy-hydrogen pairs, and clashers atom indices
:rtype: tuple[list[int], list[tuple[int, int]], list[int]]
"""
acceptors = set()
donors = set()
clashers = set()
for iatom1 in self.get_heavies():
for iatom2 in dcell.query(*ct.atom[iatom1].xyz):
p = ct.atom[iatom2].property
if p.get(ACCEPTOR_PROPERTY):
acceptors.add(iatom2)
if p.get(DONOR_PROPERTY):
atom2 = ct.atom[iatom2]
if atom2.atomic_number != 1:
for ba in atom2.bonded_atoms:
if ba.atomic_number != 1:
continue
if not ba.property.get(DONOR_PROPERTY):
continue
donor = (iatom2, int(ba))
donors.add(donor)
if p.get(ION_PROPERTY):
donors.add((iatom2, iatom2))
if p.get(CLASHER_PROPERTY):
clashers.add(iatom2)
return sorted(acceptors), sorted(donors), sorted(clashers)
[docs] class ligand_changeable(changeable):
type = 'LIGAND'
[docs] def __init__(self, ct, iatom):
super().__init__(ct, iatom)
self.name = get_residue_string(ct.atom[iatom])
self.heavy_atom_indices = []
self.adjustable_atom_indices = []
self.iatoms_by_het = {}
residue = ct.atom[iatom].getResidue()
for atom in residue.atom:
atom.property['s_pa_ligand_id'] = self.name
atom.property['i_pa_ligand_het'] = 0
# Populate the protonation states with the current ligand state
self.protonation_states = [residue.extractStructure()]
self.rotatable_xyz_by_het = defaultdict(list)
# List of changeables that are common in all ligand states and that
# have their own changeable instance. This needs to be saved here,
# to update atom indices of the changeables when ligand states are
# assigned and atoms are deleted.
self.shared_changeables = []
self.nstates_per_het = defaultdict(int)
self.states_by_het = defaultdict(list)
self.interactors_by_het = defaultdict(_Interactors)
@property
def nstates(self):
return sum(self.nstates_per_het.values())
[docs] def pre_treat_1(self, ct):
"""
Add all the protonation states to the ct. After this method the ct
is a superposition of ligand states.
"""
# Add the ligand protonation states to the main structure.
annotate_ligand_atoms(self.protonation_states, self.name)
add_ligand_sts(ct, self.protonation_states)
iatoms = get_ligand_atom_indices(ct, self.name)
self.heavy_atom_indices = [
iatom for iatom in iatoms if ct.atom[iatom].atomic_number != 1
]
[docs] def pre_treat_2(self, ct):
"""
Further annotates structure and stores data for bookkeeping and
improved computational performance. Assumes pre_treat_1 has been
called before.
"""
ligand_iatoms = get_ligand_atom_indices(ct, self.name)
further_annotate_ligand_atoms(ct, ligand_iatoms)
ligand_iatoms = get_ligand_atom_indices(ct, self.name)
self.iatoms_by_het = group_ligand_het_states(ct, ligand_iatoms)
annotate_ligand_rotatable_hydrogens(ct, ligand_iatoms)
# Update annotation of rotatable hydrogens
for het, iatoms in self.iatoms_by_het.items():
update_ligand_rotatables_annotation(ct, iatoms)
self.interactors_by_het = defaultdict(_Interactors)
for het, iatoms in self.iatoms_by_het.items():
self.interactors_by_het[het] = get_ligand_interactors(
ct, iatoms)
[docs] def add_protonation_state(self, ligand_st):
self.protonation_states.append(ligand_st)
[docs] def get_heavies(self):
return self.heavy_atom_indices
[docs] def get_adjustable_atoms(self):
return list(
itertools.chain.from_iterable(self.iatoms_by_het.values()))
[docs] def get_view_atoms(self):
het = self.get_het_from_state(self.current_state)
return self.iatoms_by_het[het]
[docs] def enumerate_states(self,
ct,
acceptors,
donors,
pH,
do_flips=True,
include_initial=False):
"""
Enumerate ligand states
The input structure is supposed to have its different protonation
states available as a superposition, i.e. all protonation states of
the ligand are fully integrated into the structure.
A ligand state is a protonation state of the ligand and a
combination of non-shared hydrogen coordinates. A non-shared
hydrogen is a rotatable hydrogen that is only present in a subset
of ligand protonation states.
"""
## The input structure will have a super position of all ligand
## protonation states, i.e. all states are already part of the structure
self.state_names = []
self.state_coordinates = defaultdict(list)
self.rotatable_xyz_by_het = defaultdict(list)
self.nstates_per_het = defaultdict(int)
nstates = 0
for het, iatoms in self.iatoms_by_het.items():
if not include_initial and het == 0:
continue
self.nstates_per_het[het] = 1
self.rotatable_xyz_by_het[het] = []
for iatom in iatoms:
hydrogen = ct.atom[iatom]
# This checks if the atom is a hydrogen and rotatable
rotatable_id = hydrogen.property.get(
'i_pa_ligand_rotatable')
if rotatable_id is None:
continue
# Get dihedral atoms to rotate around to generate states
dihedral_atoms = get_dihedral_atoms(ct, hydrogen)[::-1]
hydrogen_xyzs = []
start_xyz = hydrogen.xyz
for dihedral_angle in [0, 120, 240]:
ct.adjust(dihedral_angle, *dihedral_atoms)
hydrogen_xyzs.append(hydrogen.xyz)
hydrogen.xyz = start_xyz
self.rotatable_xyz_by_het[het].append(hydrogen_xyzs)
self.nstates_per_het[het] *= len(hydrogen_xyzs)
# Name the states
for n in range(self.nstates_per_het[het]):
self.state_names.append(f'State {het} | {n}')
# Generate rotatable hydrogen coordinates, which is the
# cartesian product of all possible hydrogen XYZs
for n, xyzs in enumerate(
itertools.product(*self.rotatable_xyz_by_het[het]),
start=nstates):
self.state_coordinates[n] = xyzs
nstates += self.nstates_per_het[het]
[docs] def get_het_from_state(self, istate: int) -> int:
"""Get the protonation state index from the state index."""
total_states = 0
for het, n in self.nstates_per_het.items():
total_states += n
if istate < total_states:
return het
raise RuntimeError("Could not determine hetid from state number.")
[docs] def get_rotatable_id_to_atom(self, ct, hetid: int) -> Dict[int, int]:
"""Return a dictionary that maps a rotatable index to its atom index"""
mapper = {}
for iatom in self.iatoms_by_het[hetid]:
atom = ct.atom[iatom]
rotatable_id = atom.property.get('i_pa_ligand_rotatable')
if rotatable_id is not None:
mapper[rotatable_id] = atom.index
return mapper
[docs] def update_hydrogen_xyz_from_state(self, ct, istate: int):
"""
Update the non-shared rotatable hydrogen coordinates for a specific
state.
Returns None but updates xyz coordinates of non-shared rotatable
hydrogens.
"""
rel_state = None
total_states = 0
for het, n in self.nstates_per_het.items():
total_states += n
if istate < total_states:
rel_state = istate - (total_states - n)
break
if rel_state is None:
raise RuntimeError(
"Relative state of tautomer could not be determined.")
mapper = self.get_rotatable_id_to_atom(ct, het)
for rotatable_id, xyz in enumerate(self.state_coordinates[istate]):
iatom = mapper[rotatable_id]
atom = ct.atom[iatom]
atom.xyz = xyz
[docs] def get_state_sites(
self, ct, istate: int
) -> Tuple[List[int], List[Tuple[int, int]], List[int], int]:
"""
Updates non-shared rotatable hydrogen positions and returns atom
indices for acceptors, donors, clashers and (unused) charge
"""
het = self.get_het_from_state(istate)
current_het = self.get_het_from_state(self.current_state)
if not self.treated and het != current_het:
self.pre_treat(ct)
self.update_hydrogen_xyz_from_state(ct, istate)
i = self.interactors_by_het[het]
return i.acceptors, i.donors, i.clashers, 0
[docs] def assign_state(self,
ct,
istate,
add_labels=True,
label_pkas=False,
state_gaps=None,
verbose=False) -> List[int]:
"""
Sets the ligand to a certain state and internally updates atom indices.
Returns list of atom indices that need to be deleted to obtain the right state.
"""
het = self.get_het_from_state(istate)
current_het = self.get_het_from_state(self.current_state)
# Only pre-treat if the new protonation state is different than the
# current one, because we only need to change some hydrogen positions
# in that case. Since this function is called interactively in
# Maestro, some speedup is welcome.
if not self.treated and het != current_het:
self.pre_treat(ct)
update_rotatable_changeable_indices(ct, self.iatoms_by_het[het],
self.shared_changeables)
self.update_hydrogen_xyz_from_state(ct, istate)
self.treated = False
self.current_state = istate
atoms_to_delete = []
for het2, iatoms in self.iatoms_by_het.items():
if het != het2:
atoms_to_delete += iatoms
return atoms_to_delete
[docs] def update_atom_indices(self, ct, new_indices: Dict[int, int]):
"""Update stored atom indices for bookkeeping."""
het = self.get_het_from_state(self.current_state)
iatoms = self.iatoms_by_het[het]
self.iatoms_by_het = {het: [new_indices[iatom] for iatom in iatoms]}
[docs] def get_penalty(self, istate: int) -> float:
"""
Get penalty for ligand state, which is currently fully defined by
the ligand protonation state. Return 0 if no penalty.
"""
het = self.get_het_from_state(istate)
return self.protonation_states[het].property.get(
'r_pa_ligand_penalty', 0)
[docs] class amide_changeable(changeable):
"""
This is the primary amide -NH2 group of ASN and GLN residues.
"""
asl = '((res.ptype \"ASN \" AND atom.ptype \" CG \") OR (res.ptype \"GLN \" AND atom.ptype \" CD \"))'
OXYGEN_PDBNAMES = [" OD1", " OE1"]
NITROGEN_PDBNAMES = [" ND2", " NE2"]
CARBON_PDBNAMES = [" CG ", " CD "]
[docs] def __init__(self, ct, iatom):
self.pka = None
self.index = 0
self.name = ''
self.nstates = 0
self.num_user_states = 0
self.state_names = []
self.carbon = None
self.oxygen = None
self.nitrogen = None
self.hydrogen1 = None
self.hydrogen2 = None
self.treated = False
self.locked = False
self.valid = True
self.name = get_residue_string(ct.atom[iatom])
# Will contain coordinates of oxygen, nitrogen and the two
# hydrogens for the two states
self.state_coordinates = []
self.type = 'AMIDE'
residue_atoms = ct.getResidueAtoms(iatom)
for atom in residue_atoms:
name = atom.pdbname
if name in self.OXYGEN_PDBNAMES:
self.oxygen = int(atom)
elif name in self.NITROGEN_PDBNAMES:
self.nitrogen = int(atom)
elif name in self.CARBON_PDBNAMES:
self.carbon = int(atom)
if self.carbon is None or self.oxygen is None or self.nitrogen is None:
report(LOG_EXTRA,
'Rejecting ' + self.name + ' due to missing atoms.')
self.valid = False
return
# Make sure the nitrogen is covalently bound ONLY to the carbon
# CG (for ASN) or CD (for GLN).
bound_atoms = []
for bond in ct.atom[self.nitrogen].bond:
if bond.atom2.atomic_number != 1 and bond.order != 0:
bound_atoms.append(bond.atom2.pdbname)
if len(bound_atoms
) > 1 or bound_atoms[0] not in self.CARBON_PDBNAMES:
report(LOG_EXTRA,
'Rejecting ' + self.name + ' due to covalent bond.')
self.valid = False
return
state_name = ct.atom[self.carbon].property.get(STATE_PROPERTY)
if state_name == 'Flip':
self.current_state = 1
self.initial_state = 1
else:
self.current_state = 0
self.initial_state = 0
[docs] def pre_treat_2(self, ct):
hydrogens = []
for ba in ct.atom[self.nitrogen].bonded_atoms:
if ba.atomic_number != 1:
continue
ba.property['i_pa_atomindex'] = ba.index
hydrogens.append(ba.index)
self.hydrogen1, self.hydrogen2 = hydrogens
[docs] def enumerate_states(self,
ct,
acceptors,
donors,
pH,
do_flips=True,
include_initial=False):
"""
Generate states for amides
Max 2 states are generated by swapping the oxygen and nitrogen and
readjusting the hydrogens to the nitrogen.
:param ct: Structure to generate states for
:type ct: Structure
:param do_flips: Include the flipped state of the amide
:type do_flips: bool
Other parameters are not used
"""
n_xyz = ct.atom[self.nitrogen].xyz
o_xyz = ct.atom[self.oxygen].xyz
h1_xyz = ct.atom[self.hydrogen1].xyz
h2_xyz = ct.atom[self.hydrogen2].xyz
self.state_coordinates = [[o_xyz, n_xyz, h1_xyz, h2_xyz]]
self.state_names = ['No Flip']
if do_flips or self.initial_state == 1:
# Extract residue structure to determine hydrogen positions in
# flipped state
self.swap_atoms(ct, self.nitrogen, self.oxygen)
amide_st = ct.atom[
self.nitrogen].getResidue().extractStructure()
self.swap_atoms(ct, self.nitrogen, self.oxygen)
# Remove hydrogens and rebuild them at the nitrogen to obtain
# idealized coordinates
build.delete_hydrogens(amide_st)
amide = next(iter(amide_st.residue))
n1, n2 = self.NITROGEN_PDBNAMES
nitrogen = amide.getAtomByPdbName(n1)
if nitrogen is None:
nitrogen = amide.getAtomByPdbName(n2)
build.add_hydrogens(amide_st, atom_list=[nitrogen])
coordinates = [n_xyz, o_xyz]
for ba in nitrogen.bonded_atoms:
if ba.atomic_number == 1:
coordinates.append(ba.xyz)
self.state_coordinates.append(coordinates)
self.state_names.append('Flip')
# If the state of the amide is determined in a previous run, we
# want to retain the state name and its corresponding state
# coordinates. To accomplish this simply reverse the state
# coordinate list.
if self.initial_state == 1:
self.state_coordinates = self.state_coordinates[::-1]
# In case there are no flips included, reduce the states
if not do_flips:
self.state_coordinates = self.state_coordinates[:1]
self.nstates = len(self.state_coordinates)
[docs] def set_state_coordinates(self, ct, istate):
"""
Set coordinates of nitrogen, oxygen, and hydrogens to
pre-calculated state coordinates.
"""
o_xyz, n_xyz, h1_xyz, h2_xyz = self.state_coordinates[istate]
ct.atom[self.oxygen].xyz = o_xyz
ct.atom[self.nitrogen].xyz = n_xyz
ct.atom[self.hydrogen1].xyz = h1_xyz
ct.atom[self.hydrogen2].xyz = h2_xyz
[docs] def assign_state(self,
ct,
istate,
add_labels=True,
label_pkas=False,
state_gaps=None,
verbose=False):
"""
Assign state to amide residue.
The state is changed by setting the coordinates of the nitrogen,
oxygen and 2 hydrogens to pre-calculated positions.
:param ct: Structure to change
:type ct: Structure
:param istate: State index to assign
:type istate: int
:param add_labels: Add labels to the carbon atom to indicate flip
:type add_labels: bool
:param state_gaps: List of state gaps
:type state_gaps: List[float]
:param verbose: Does nothing
:param verbose: bool
"""
if not self.treated:
self.pre_treat(ct)
self.set_state_coordinates(ct, istate)
# Annotate carbon atom
carbon = ct.atom[self.carbon]
carbon.label_format = "%UT"
carbon.label_color = label_color
state_name = self.state_names[istate]
carbon.property[STATE_PROPERTY] = state_name
if add_labels:
label = ''
if istate == 1:
label = state_name
carbon.label_user_text = label
self.assign_state_gap(carbon, state_gaps, report_gaps=verbose)
self.current_state = istate
return []
[docs] def update_atom_indices(self, ct, new_indices):
# Gotta be a better way...
self.carbon = self.get_new_index(ct, self.carbon, new_indices)
self.oxygen = self.get_new_index(ct, self.oxygen, new_indices)
self.nitrogen = self.get_new_index(ct, self.nitrogen, new_indices)
self.hydrogen1 = self.get_new_index(ct, self.hydrogen1, new_indices)
self.hydrogen2 = self.get_new_index(ct, self.hydrogen2, new_indices)
[docs] def get_heavies(self):
return [self.oxygen, self.nitrogen]
[docs] def get_state_sites(self, ct, istate):
"""
Return state sites consisting of acceptors, donors, clashers and
charge.
:return: List of acceptor, donor, clasher atom indices and charge
:rtype: Tuple[List[int], List[int, int], List[int], float]
"""
self.set_state_coordinates(ct, istate)
return ([self.oxygen], [(self.nitrogen, self.hydrogen1),
(self.nitrogen, self.hydrogen2)], [], 0.0)
[docs] def get_view_atoms(self):
return [self.carbon, self.oxygen, self.nitrogen]
[docs] def get_penalty(self, istate):
return 0.0
[docs] def get_adjustable_atoms(self):
return [self.oxygen, self.nitrogen, self.hydrogen1, self.hydrogen2]
[docs] class histidine_changeable(changeable):
"""
Imidazole group of Histidine residues.
"""
asl = '((res.ptype \"HIS \",\"HID \",\"HIE \",\"HIP \")) AND ((atom.ptype \" CG \"))'
[docs] def __init__(self, ct, iatom):
# Calculated pKa of histidine and when flipped
self.pka = None
self.pka_flip = None
# Flags to indicate whther this histidine is forced to be neutral
# or charged for pKa prediction
self.force_neutral = False
self.force_charged = False
self.index = 0
self.name = ''
self.nstates = 0
self.num_user_states = 0
self.state_names = []
self.penalties = []
self.CG = None
self.ND1 = None
self.HD1 = None
self.NE2 = None
self.HE2 = None
self.CD2 = None
self.HD2 = None
self.CE1 = None
self.HE1 = None
self.treated = False
self.locked = False
self.valid = True
self.name = get_residue_string(ct.atom[iatom])
self.type = 'HISTIDINE'
# Find hydrogen atoms:
residue_atoms = ct.getResidueAtoms(iatom)
for atom in residue_atoms:
name = atom.pdbname.strip()
if name in self.__dict__:
self.__dict__[name] = int(atom)
# Catch non-standard hydrogen names.
elif atom.atomic_number == 1:
for heavy in atom.bonded_atoms:
bond = ct.getBond(heavy, atom)
if bond.order == 0:
continue
if int(heavy) == self.ND1:
self.HD1 = int(atom)
elif int(heavy) == self.NE2:
self.HE2 = int(atom)
elif int(heavy) == self.CD2:
self.HD2 = int(atom)
elif int(heavy) == self.CE1:
self.HE1 = int(atom)
if self.CG is None or self.ND1 is None or self.NE2 is None or self.CD2 is None or self.CE1 is None:
report(LOG_EXTRA,
'Rejecting ' + self.name + ' due to missing atoms.')
self.valid = False
return
# Check for something covalently bound (outside of the ring):
allowed_bonds = {
self.ND1: {' CG ', ' CE1'},
self.NE2: {' CE1', ' CD2'},
self.CE1: {' ND1', ' NE2'},
self.CD2: {' NE2', ' CG '},
}
for heavy, expected_names in allowed_bonds.items():
bound_names = []
for bond in ct.atom[heavy].bond:
if bond.atom2.atomic_number != 1 and bond.order != 0:
bound_names.append(bond.atom2.pdbname)
if len(bound_names) > 2 or set(bound_names) != expected_names:
report(LOG_EXTRA,
'Rejecting ' + self.name + ' due to covalent bond.')
self.valid = False
return
flip = 0
protonate = 0
flip_tag = re.compile('Flip')
if STATE_PROPERTY in ct.atom[self.CG].property and \
flip_tag.match(ct.atom[self.CG].property[STATE_PROPERTY]):
flip = 1
else:
flip = 0
if self.HD1 is not None:
if self.HE2 is not None:
protonation = 2 # HIP
else:
protonation = 0 # HID
else:
protonation = 1 # HIE
self.current_state = protonation + flip * 3
self.initial_state = protonation + flip * 3
return
[docs] def pre_treat_1(self, ct):
ct.atom[self.ND1].formal_charge = 1
ct.atom[self.NE2].formal_charge = 0
ct.getBond(self.ND1, self.CE1).type = structure.BondType.Double
ct.getBond(self.CD2, self.NE2).type = structure.BondType.Single
ct.getBond(self.CE1, self.NE2).type = structure.BondType.Single
ct.getBond(self.CG, self.CD2).type = structure.BondType.Double
[docs] def pre_treat_2(self, ct):
residue_atoms = ct.getResidueAtoms(self.CG)
for hyd in residue_atoms:
if hyd.atomic_number != 1:
continue
for heavy in hyd.bonded_atoms:
bond = ct.getBond(heavy, hyd)
if bond.order == 0:
continue
if int(heavy) == self.ND1:
self.HD1 = int(hyd)
hyd.property['i_pa_atomindex'] = int(hyd)
elif int(heavy) == self.NE2:
self.HE2 = int(hyd)
hyd.property['i_pa_atomindex'] = int(hyd)
if self.current_state > 2 and not self.locked:
# Needs to be flipped back, for bookkeeping to be consistent.
atoms_to_swap = [(self.ND1, self.CD2), (self.HD1, self.HD2),
(self.NE2, self.CE1), (self.HE2, self.HE1)]
for atoms in atoms_to_swap:
self.swap_atoms(ct, atoms[0], atoms[1])
self.current_state -= 3
self.treated = True
return
[docs] def enumerate_states(self,
ct,
acceptors,
donors,
pH,
do_flips=True,
include_initial=False):
"""
Enumerate histidine states and create penalties for certain states.
:param ct: Structure to generate states for
:type ct: Structure
:param acceptors: List of acceptor atom indices
:type acceptors: List[int]
:param donors: List of donor pair atom indices
:type donors: List[Tuple[int, int]]
:param pH: pH of system to determine pH-related penalties
:type pH: float
:param do_flips: Include flipped histidine states in pool
:type do_flips: bool
:param include_initial: Does nothing here
"""
if do_flips:
self.nstates = 6
self.state_names = [
'HID', 'HIE', 'HIP', 'Flip HID', 'Flip HIE', 'Flip HIP'
]
else:
self.nstates = 3
self.state_names = ['HID', 'HIE', 'HIP']
if pH is None:
self.pH_penalties = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
elif pH in [pH_vlow, pH_low]:
self.pH_penalties = [4.0, 4.0, 0.0, 4.0, 4.0, 0.0]
else:
self.pH_penalties = [0.0, 0.0, 4.0, 0.0, 0.0, 4.0]
self.contact_penalties = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
for iatom in [self.HD1, self.HD2, self.HE1, self.HE2]:
assert iatom is not None
for iacceptor in acceptors:
assert iacceptor is not None
if ct.measure(iatom, iacceptor) < 2.1:
formal_charge = 0.0
if ct.atom[iacceptor].formal_charge < 0.0:
formal_charge = ct.atom[iacceptor].formal_charge
# Check if it's a carboxylate.
elif ct.atom[iacceptor].atomic_number == 8:
connected_atoms = analyze.evaluate_asl(
ct,
'not (atom.num %d) and withinbonds 2 (atom.num %d)'
% (iacceptor, iacceptor))
for jatom in connected_atoms:
if ct.atom[jatom].atomic_number == 8 and ct.atom[
jatom].formal_charge < 0.0:
formal_charge = ct.atom[jatom].formal_charge
if iatom == self.HD1:
report(
LOG_DEBUG, self.name + ' : HD1 is near ' +
str(ct.atom[iacceptor].resnum) + ':' +
ct.atom[iacceptor].pdbname)
self.contact_penalties[1] += 10.0
self.contact_penalties[3] += 10.0
self.contact_penalties[4] += 10.0
self.contact_penalties[5] += 10.0
if iatom == self.HD2:
report(
LOG_DEBUG, self.name + ' : HD2 is near ' +
str(ct.atom[iacceptor].resnum) + ':' +
ct.atom[iacceptor].pdbname)
self.contact_penalties[0] += 10.0
self.contact_penalties[1] += 10.0
self.contact_penalties[2] += 10.0
self.contact_penalties[4] += 10.0
if iatom == self.HE1:
report(
LOG_DEBUG, self.name + ' : HE1 is near ' +
str(ct.atom[iacceptor].resnum) + ':' +
ct.atom[iacceptor].pdbname)
self.contact_penalties[0] += 10.0
self.contact_penalties[1] += 10.0
self.contact_penalties[2] += 10.0
self.contact_penalties[3] += 10.0
if iatom == self.HE2:
report(
LOG_DEBUG, self.name + ' : HE2 is near ' +
str(ct.atom[iacceptor].resnum) + ':' +
ct.atom[iacceptor].pdbname)
self.contact_penalties[0] += 10.0
self.contact_penalties[3] += 10.0
self.contact_penalties[4] += 10.0
self.contact_penalties[5] += 10.0
# Only want to enforce this rule if not using PropKa.
if (formal_charge < 0.0 or ct.atom[iacceptor].pdbname
in [' OD1', ' OD2', ' OE1', ' OE2'
]) and (pH is not None):
report(
LOG_DEBUG, self.name + ' salt-bridged to ' +
ct.atom[iacceptor].pdbres +
str(ct.atom[iacceptor].resnum))
self.contact_penalties[0] += 10.0
self.contact_penalties[1] += 10.0
self.contact_penalties[3] += 10.0
self.contact_penalties[4] += 10.0
break
for donor in donors:
# ASP and GLU can also be in donors, if they're being predicted.
if ct.atom[donor[0]].pdbres in [
"ASP ", "GLU "
] and ct.atom[donor[0]].pdbname in [
' OD1', ' OD2', ' OE1', ' OE2'
]:
# Must be in contact, and we aren't using PropKa.
if ct.measure(iatom, donor[0]) < 2.0 and pH is not None:
report(
LOG_DEBUG, self.name + ' salt-bridged to ' +
ct.atom[donor[0]].pdbres +
str(ct.atom[donor[0]].resnum))
self.contact_penalties[0] += 10.0
self.contact_penalties[1] += 10.0
self.contact_penalties[3] += 10.0
self.contact_penalties[4] += 10.0
# Metal will force a particular state, which overrides all else.
if donor[0] == donor[1]:
if (ct.atom[donor[0]].atomic_number not in [
7, 8
]) and (ct.measure(iatom, donor[0]) < 2.5):
if iatom == self.HD1:
self.contact_penalties = [
10.0, 0.0, 10.0, 10.0, 10.0, 10.0
]
elif iatom == self.HD2:
self.contact_penalties = [
10.0, 10.0, 10.0, 10.0, 0.0, 10.0
]
elif iatom == self.HE1:
self.contact_penalties = [
10.0, 10.0, 10.0, 0.0, 10.0, 10.0
]
elif iatom == self.HE2:
self.contact_penalties = [
0.0, 10.0, 10.0, 10.0, 10.0, 10.0
]
report(
LOG_DEBUG, self.name +
' Metal ion detected, restricting state.')
break
self.penalties = []
for i in range(6):
self.penalties.append(self.pH_penalties[i] +
self.contact_penalties[i])
report(LOG_DEBUG, self.name + ' Penalties = ' + str(self.penalties))
return
[docs] def lock_protonation(self):
if self.initial_state in [2, 5]:
self.contact_penalties = [
1000.0, 1000.0, 0.0, 1000.0, 1000.0, 0.0
]
else:
self.contact_penalties = [0.0, 0.0, 1000.0, 0.0, 0.0, 1000.0]
self.penalties = [
self.pH_penalties[i] + self.contact_penalties[i]
for i in range(6)
]
return
[docs] def assign_state(self,
ct,
istate,
add_labels=True,
label_pkas=False,
state_gaps=None,
verbose=False):
if not self.treated:
self.pre_treat(ct)
if (istate > 2 and self.current_state <= 2) or \
(istate <= 2 and self.current_state > 2):
atoms_to_swap = [(self.ND1, self.CD2), (self.HD1, self.HD2),
(self.NE2, self.CE1), (self.HE2, self.HE1)]
for atoms in atoms_to_swap:
self.swap_atoms(ct, atoms[0], atoms[1])
protonation = istate - (istate // 3) * 3
atoms_to_delete = []
protonation_name = ''
if protonation == 0:
ct.atom[self.ND1].formal_charge = 0
ct.getBond(self.ND1, self.CE1).type = structure.BondType.Single
ct.getBond(self.NE2, self.CE1).type = structure.BondType.Double
protonation_name = 'HIS'
atoms_to_delete = [self.HE2]
elif protonation == 1:
ct.atom[self.ND1].formal_charge = 0
protonation_name = 'HIE'
atoms_to_delete = [self.HD1]
elif protonation == 2:
protonation_name = 'HIP'
ct.atom[self.CG].label_format = "%UT"
ct.atom[self.CG].label_color = label_color
if istate > 2:
prot_label = 'Flip ' + protonation_name
ct.atom[self.CG].property[
STATE_PROPERTY] = 'Flip ' + protonation_name
else:
prot_label = protonation_name
ct.atom[self.CG].property[STATE_PROPERTY] = protonation_name
self.assign_state_gap(ct.atom[self.CG],
state_gaps,
report_gaps=verbose)
if add_labels:
label = prot_label
else:
label = ''
if self.pka is not None and label_pkas:
pka = self.pka
if istate > 2 and self.pka_flip is not None:
pka = self.pka_flip
label += ' %.2f' % pka
ct.atom[self.CG].property[pka_property] = pka
ct.atom[self.CG].label_user_text = label
residue_atoms = ct.getResidueAtoms(self.CG)
for atom in residue_atoms:
atom.pdbres = protonation_name
self.current_state = istate
self.treated = False
return (atoms_to_delete)
[docs] def update_atom_indices(self, ct, new_indices):
# Gotta be a better way...
self.CG = self.get_new_index(ct, self.CG, new_indices)
self.ND1 = self.get_new_index(ct, self.ND1, new_indices)
self.HD1 = self.get_new_index(ct, self.HD1, new_indices)
self.NE2 = self.get_new_index(ct, self.NE2, new_indices)
self.HE2 = self.get_new_index(ct, self.HE2, new_indices)
self.CD2 = self.get_new_index(ct, self.CD2, new_indices)
self.HD2 = self.get_new_index(ct, self.HD2, new_indices)
self.CE1 = self.get_new_index(ct, self.CE1, new_indices)
self.HE1 = self.get_new_index(ct, self.HE1, new_indices)
return
[docs] def get_heavies(self):
return [self.ND1, self.NE2, self.CD2, self.CE1]
[docs] def get_state_sites(self, ct, istate):
if istate == 0:
return ([self.NE2], [(self.ND1, self.HD1)],
[self.HD2, self.HE1], 0.0)
elif istate == 1:
return ([self.ND1], [(self.NE2, self.HE2)],
[self.HD2, self.HE1], 0.0)
elif istate == 2:
return ([], [(self.ND1, self.HD1),
(self.NE2, self.HE2)], [self.HD2, self.HE1], 1)
elif istate == 3:
return ([self.CE1], [(self.CD2, self.HD2)],
[self.HD1, self.HE2], 0.0)
elif istate == 4:
return ([self.CD2], [(self.CE1, self.HE1)],
[self.HD1, self.HE2], 0.0)
elif istate == 5:
return ([], [(self.CD2, self.HD2),
(self.CE1, self.HE1)], [self.HD1, self.HE2], 1)
return
[docs] def get_view_atoms(self):
return [self.CG, self.ND1, self.NE2, self.CD2, self.CE1]
[docs] def get_penalty(self, istate):
return self.penalties[istate]
[docs] def get_adjustable_atoms(self):
return [
self.ND1, self.HD1, self.NE2, self.HE2, self.CD2, self.HD2,
self.CE1, self.HE1
]
[docs] def change_pka(self, pka, pH):
if pka is None:
reference_pka = 6.10
else:
reference_pka = pka
self.pka = pka
# If the pKa is within 0.5 of the pH, let the scoring function
# determine the protonation state
if math.fabs(pH - reference_pka) <= 0.5:
self.pH_penalties = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
elif pH < reference_pka:
self.pH_penalties = [4.0, 4.0, 0.0, 4.0, 4.0, 0.0]
else:
self.pH_penalties = [0.0, 0.0, 4.0, 0.0, 0.0, 4.0]
new_penalties = []
something_changed = False
for i in range(6):
new_penalties.append(self.pH_penalties[i] +
self.contact_penalties[i])
if self.penalties[i] != new_penalties[i]:
something_changed = True
self.penalties = new_penalties
return something_changed
[docs] def change_empirical_pka(self, pH):
if self.pka is None:
return
penalty = 4.0
states_to_penalize = []
delta = self.pka - pH
if delta > 0.3:
states_to_penalize += [0, 1]
elif delta < -0.3:
states_to_penalize.append(2)
if self.pka_flip is not None:
delta = self.pka_flip - pH
if delta > 0.3:
states_to_penalize += [3, 4]
elif delta < -0.3:
states_to_penalize.append(5)
pH_penalties = [0.0] * 6
for state in states_to_penalize:
pH_penalties[state] += penalty
if self.force_neutral:
pH_penalties[2] += 100 * penalty
pH_penalties[5] += 100 * penalty
if self.force_charged:
for i in (0, 1, 3, 4):
pH_penalties[i] += 100 * penalty
self.pH_penalties = pH_penalties
self.penalties = [
p1 + p2
for p1, p2 in zip(self.contact_penalties, self.pH_penalties)
]
[docs] class carboxyl_changeable(changeable):
asl = '(res.ptype \"ASP \",\"ASH \" AND atom.ptype \" CG \") OR (res.ptype \"GLU \",\"GLH \" AND atom.ptype \" CD \")'
[docs] def __init__(self, ct, iatom):
self.pka = None
self.index = 0
self.name = ''
self.nstates = 0
self.num_user_states = 0
self.state_names = []
self.carbon = None
self.oxygen1 = None
self.oxygen2 = None
self.hydrogen1 = None
self.hydrogen2 = None
self.treated = False
self.locked = False
self.valid = True
self.name = get_residue_string(ct.atom[iatom])
self.type = 'CARBOXYL'
residue_atoms = ct.getResidueAtoms(iatom)
for atom in residue_atoms:
name = atom.pdbname
if (atom.pdbres in ["ASP ", "ASH "] and
name == " CG ") or (atom.pdbres in ["GLU ", "GLH "] and
name == " CD "):
self.carbon = int(atom)
elif name in [' OD1', ' OE1']:
self.oxygen1 = int(atom)
for bonded_atom in atom.bonded_atoms:
if bonded_atom.atomic_number == 1:
self.hydrogen1 = int(bonded_atom)
elif name in [' OD2', ' OE2']:
self.oxygen2 = int(atom)
for bonded_atom in atom.bonded_atoms:
if bonded_atom.atomic_number == 1:
self.hydrogen2 = int(bonded_atom)
if self.carbon is None or self.oxygen1 is None or self.oxygen2 is None:
report(LOG_EXTRA,
'Rejecting ' + self.name + ' due to missing atoms.')
self.valid = False
return
# Check for something covalently bound.
for oxygen in [self.oxygen1, self.oxygen2]:
for bond in ct.atom[oxygen].bond:
if bond.atom2.atomic_number != 1 and bond.atom2.pdbname not in [
' CG ', ' CD '
] and bond.order != 0:
report(
LOG_EXTRA,
'Rejecting ' + self.name + ' due to covalent bond.')
self.valid = False
return
if self.hydrogen1 is not None:
self.current_state = 1
self.initial_state = 1
elif self.hydrogen2 is not None:
self.current_state = 2
self.initial_state = 2
else:
self.current_state = 0
self.initial_state = 0
return
[docs] def pre_treat_1(self, ct):
ct.atom[self.oxygen1].formal_charge = 1
ct.atom[self.oxygen2].formal_charge = 0
ct.getBond(self.carbon,
self.oxygen1).type = structure.BondType.Double
ct.getBond(self.carbon,
self.oxygen2).type = structure.BondType.Single
[docs] def pre_treat_2(self, ct):
residue_atoms = ct.getResidueAtoms(self.carbon)
for hyd in residue_atoms:
if hyd.atomic_number != 1:
continue
for heavy in hyd.bonded_atoms:
bond = ct.getBond(heavy, hyd)
if bond.order == 0:
continue
if int(heavy) == self.oxygen1:
self.hydrogen1 = int(hyd)
hyd.property['i_pa_atomindex'] = int(hyd)
elif int(heavy) == self.oxygen2:
self.hydrogen2 = int(hyd)
hyd.property['i_pa_atomindex'] = int(hyd)
self.treated = True
return
[docs] def enumerate_states(self,
ct,
acceptors,
donors,
pH,
do_flips=True,
include_initial=False):
self.contact_penalties = [0.0, 0.0, 0.0]
self.nstates = 3
self.state_names = ['Charged', 'Neutral 1', 'Neutral 2']
if pH is None:
self.pH_penalties = [0.0, 0.0, 0.0]
elif pH in [pH_vlow]:
self.pH_penalties = [5.0, 0.0, 0.0]
else:
self.pH_penalties = [0.0, 5.0, 5.0]
for iatom in [self.hydrogen1, self.hydrogen2]:
# If paired with another ASP/GLU, remove protonation penalties (but don't force protonation)
for donor in donors:
if ct.atom[donor[0]].pdbname in [
' OD1', ' OD2', ' OE1', ' OE2'
] and ct.atom[donor[0]].resnum != ct.atom[iatom].resnum:
if ct.measure(iatom, donor[0]) < 2.0:
if iatom == self.hydrogen1:
report(
LOG_DEBUG,
self.name + ' : hydrogen1 is near ' +
str(ct.atom[donor[0]].resnum) + ':' +
ct.atom[donor[0]].pdbname)
self.contact_penalties = [0.0, 0.0, 0.0]
elif iatom == self.hydrogen2:
report(
LOG_DEBUG,
self.name + ' : hydrogen2 is near ' +
str(ct.atom[donor[0]].resnum) + ':' +
ct.atom[donor[0]].pdbname)
self.contact_penalties = [0.0, 0.0, 0.0]
# Check for a metal
elif donor[0] == donor[1]:
if (ct.atom[donor[0]].atomic_number not in [7, 8]) and (
(ct.measure(self.oxygen1, donor[0]) < 3.0) or
(ct.measure(self.oxygen2, donor[0]) < 3.0)):
self.contact_penalties = [0.0, 10.0, 10.0]
report(
LOG_DEBUG, self.name +
' Metal ion detected, restricting state.')
break
for iacceptor in acceptors:
if ct.measure(iatom, iacceptor) < 2.0:
if iatom == self.hydrogen1:
report(
LOG_DEBUG, self.name + ' : hydrogen1 is near ' +
str(ct.atom[iacceptor].resnum) + ':' +
ct.atom[iacceptor].pdbname)
self.contact_penalties[2] += 10.0
elif iatom == self.hydrogen2:
report(
LOG_DEBUG, self.name + ' : hydrogen2 is near ' +
str(ct.atom[iacceptor].resnum) + ':' +
ct.atom[iacceptor].pdbname)
self.contact_penalties[1] += 10.0
break
self.penalties = []
for i in range(3):
self.penalties.append(self.pH_penalties[i] +
self.contact_penalties[i])
report(LOG_DEBUG, self.name + ' Penalties = ' + str(self.penalties))
return
[docs] def lock_protonation(self):
if self.initial_state == 0:
self.contact_penalties = [0.0, 1000.0, 1000.0]
else:
self.contact_penalties = [1000.0, 0.0, 0.0]
self.penalties = [
self.pH_penalties[i] + self.contact_penalties[i]
for i in range(3)
]
return
[docs] def assign_state(self,
ct,
istate,
add_labels=True,
label_pkas=False,
state_gaps=None,
verbose=False):
if not self.treated:
self.pre_treat(ct)
atoms_to_delete = []
ct.atom[self.carbon].label_format = "%UT"
ct.atom[self.carbon].label_color = label_color
if (istate == 1 and
self.current_state != 1) or (istate != 1 and
self.current_state == 1):
atoms_to_swap = [(self.oxygen1, self.oxygen2),
(self.hydrogen1, self.hydrogen2)]
for atoms in atoms_to_swap:
self.swap_atoms(ct, atoms[0], atoms[1])
# Unlike His/Asn/Gln, we aren't preserving the flip info, so update these.
self.oxygen1, self.oxygen2 = self.oxygen2, self.oxygen1
self.hydrogen1, self.hydrogen2 = self.hydrogen2, self.hydrogen1
if istate == 0:
protonated = False
ct.atom[self.oxygen1].formal_charge = 0
ct.atom[self.oxygen2].formal_charge = -1
ct.getBond(self.carbon,
self.oxygen1).type = structure.BondType.Double
ct.getBond(self.carbon,
self.oxygen2).type = structure.BondType.Single
atoms_to_delete = [self.hydrogen1, self.hydrogen2]
prot_label = ''
elif istate == 1:
protonated = True
ct.atom[self.oxygen1].formal_charge = 0
ct.atom[self.oxygen2].formal_charge = 0
ct.getBond(self.carbon,
self.oxygen1).type = structure.BondType.Single
ct.getBond(self.carbon,
self.oxygen2).type = structure.BondType.Double
atoms_to_delete = [self.hydrogen2]
prot_label = 'Neutral'
elif istate == 2:
protonated = True
ct.atom[self.oxygen1].formal_charge = 0
ct.atom[self.oxygen2].formal_charge = 0
ct.getBond(self.carbon,
self.oxygen1).type = structure.BondType.Double
ct.getBond(self.carbon,
self.oxygen2).type = structure.BondType.Single
atoms_to_delete = [self.hydrogen1]
prot_label = 'Neutral'
if add_labels:
label = prot_label
else:
label = ''
if self.pka is not None and label_pkas:
label += ' %.2f' % self.pka
ct.atom[self.carbon].property[pka_property] = self.pka
ct.atom[self.carbon].label_user_text = label
# Set the residue name.
if ct.atom[self.carbon].pdbres[0:2] == 'AS':
if protonated:
protonation_name = 'ASH '
else:
protonation_name = 'ASP '
if ct.atom[self.carbon].pdbres[0:2] == 'GL':
if protonated:
protonation_name = 'GLH '
else:
protonation_name = 'GLU '
self.assign_state_gap(ct.atom[self.carbon],
state_gaps,
report_gaps=verbose)
residue_atoms = ct.getResidueAtoms(self.carbon)
for atom in residue_atoms:
atom.pdbres = protonation_name
self.current_state = istate
self.treated = False
return atoms_to_delete
[docs] def update_atom_indices(self, ct, new_indices):
# Gotta be a better way...
self.carbon = self.get_new_index(ct, self.carbon, new_indices)
self.oxygen1 = self.get_new_index(ct, self.oxygen1, new_indices)
self.oxygen2 = self.get_new_index(ct, self.oxygen2, new_indices)
self.hydrogen1 = self.get_new_index(ct, self.hydrogen1, new_indices)
self.hydrogen2 = self.get_new_index(ct, self.hydrogen2, new_indices)
return
[docs] def get_heavies(self):
return [self.oxygen1, self.oxygen2]
[docs] def get_state_sites(self, ct, istate):
if istate == 0:
return ([self.oxygen1, self.oxygen2], [], [], 0.0)
elif istate == 1:
return ([self.oxygen1, self.oxygen2],
[(self.oxygen1, self.hydrogen1)], [], 0.0)
elif istate == 2:
return ([self.oxygen1, self.oxygen2],
[(self.oxygen2, self.hydrogen2)], [], 0.0)
return
[docs] def get_view_atoms(self):
return [self.carbon, self.oxygen1, self.oxygen2]
[docs] def get_penalty(self, istate):
return self.penalties[istate]
[docs] def get_adjustable_atoms(self):
return [self.oxygen1, self.oxygen2]
[docs] def change_pka(self, pka, pH):
if pka is None:
reference_pka = 4.00
else:
reference_pka = pka
self.pka = pka
# propka sometimes has some very large pKas for ASP and GLU
# so we never want to force carboxlyate to be protonated. If the
# propKa is lower than the reference then we should (almost)
# always deprotonate. If the pKa is larger than the reference
# then we should allow the hbonding pattern to determine whether
# the system should be protonated. Unlike other residues, a
# very large pKa will never (almost) force a protonated state.
if (reference_pka >= pH):
self.pH_penalties = [0.0, 0.0, 0.0]
else:
self.pH_penalties = [0.0, 4.0, 4.0]
new_penalties = []
something_changed = False
for i in range(3):
new_penalties.append(self.pH_penalties[i] +
self.contact_penalties[i])
if self.penalties[i] != new_penalties[i]:
something_changed = True
self.penalties = new_penalties
return something_changed
[docs] def change_empirical_pka(self, pH):
if self.pka is None:
return
delta = self.pka - pH
if delta < 0.3:
# We really do not want to protonate the Glu/Asp unless the pKa
# is significanlty lower than the pH
pH_penalties = [0, 100, 100]
elif delta > 0.3:
# Only add a modest pH penalty to the charged state in case a
# clash occurs in the protonated state.
pH_penalties = [10, 0, 0]
self.pH_penalties = pH_penalties
self.penalties = [
p1 + p2 for p1, p2 in zip(self.contact_penalties, pH_penalties)
]
[docs] class rotatable_changeable(changeable):
asl = '((res.ptype \"CYS \",\"CYT \") AND (atom.ptype \" SG \") AND (atom.formal -1)) OR ((res.ptype \"TYR \") AND (atom.ptype \" OH \") AND (atom.formal -1)) OR (( atom.ele H AND not /C0-H0/ AND not /N0-H0/ ) AND NOT (res.ptype \"HOH\",\"DOD\",\"SPC\",\"ASH\",\"GLH\",\"ASP\",\"GLU\" ))'
type = 'ROTATABLE'
[docs] def __init__(self, ct, iatom):
self.pka = None
self.index = 0
self.nstates = 0
self.num_user_states = 0
self.state_names = []
self.dihedrals = []
self.valid = True
self.name = get_atom_string(ct.atom[iatom])
self.ionizable = False
self.current_state = 0
self.initial_state = 0
# If it's a deprotonated CYS/TYR
if ct.atom[iatom].pdbname == " SG " or (
ct.atom[iatom].pdbres == "TYR " and
ct.atom[iatom].pdbname == " OH "):
# Abort if it's coordinated to a metal
for heavy_bond in ct.atom[iatom].bond:
if heavy_bond.order == 0:
report(
LOG_EXTRA, 'Rejecting ' + self.name +
' - coordinated to a metal.')
self.valid = False
return
# Set state to -1 now, and map it to the last state later in enumerate_states.
self.current_state = -1
self.initial_state = -1
ct.atom[iatom].formal_charge = 0
build.add_hydrogens(ct, atom_list=[iatom])
for bonded_atom in ct.atom[iatom].bonded_atoms:
if bonded_atom.atomic_number == 1:
self.hydrogen = int(bonded_atom)
break
else:
self.hydrogen = iatom
try:
dihedral_atoms = get_dihedral_atoms(ct, self.hydrogen)
except:
self.valid = False
return
if len(dihedral_atoms) < 4:
report(
LOG_EXTRA,
'Rejecting ' + self.name + ' due to lack of rotatability.')
self.valid = False
return
if ct.atom[dihedral_atoms[1]].bond_total != 2:
report(
LOG_EXTRA,
'Rejecting ' + self.name + ' - parent has too many bonds.')
self.valid = False
return
self.parent = dihedral_atoms[1]
self.parentparent = dihedral_atoms[2]
self.parentparentparent = dihedral_atoms[3]
self.treated = True
self.locked = False
nbonds = ct.atom[self.parentparent].bond_total
if nbonds == 3:
self.periodicity = 2
elif nbonds == 4:
self.periodicity = 3
# Whether or not the atom at a given dihedral position is heavy.
self.eclipse_is_heavy = []
# Find out which are heavy/hydrogen.
for atom in ct.atom[self.parentparent].bonded_atoms:
if atom.atomic_number == 1:
heavy = False
else:
heavy = True
if int(atom) == self.parentparentparent:
# 0 position.
self.eclipse_is_heavy.append((0.0, heavy))
elif int(atom) != self.parent:
dihedral = ct.measure(self.parentparentparent,
self.parentparent, self.parent,
atom)
self.eclipse_is_heavy.append((dihedral, heavy))
else:
self.periodicity = 0
# Never ionize a rotatable as we don't have data a dataset to test
# our accuracy
#if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ', 'TYR ']:
# self.ionizable = True
[docs] def pre_treat_1(self, ct):
ct.atom[self.parent].formal_charge = 0
return
[docs] def pre_treat_2(self, ct):
residue_atoms = ct.getResidueAtoms(self.parent)
for hyd in residue_atoms:
if hyd.atomic_number != 1:
continue
for heavy in hyd.bonded_atoms:
bond = ct.getBond(heavy, hyd)
if bond.order == 0:
continue
if int(heavy) == self.parent:
self.hydrogen = int(hyd)
hyd.property['i_pa_atomindex'] = int(hyd)
self.treated = True
return
[docs] def enumerate_states(self,
ct,
acceptors,
donors,
pH,
do_flips=True,
include_initial=False):
distance_check = self.max_hbond_distance
if include_initial:
# Include initial state.
dihedral = ct.measure(self.parentparentparent,
self.parentparent, self.parent,
self.hydrogen)
self.dihedrals.append(dihedral)
self.state_names.append('Initial')
self.nstates = 1
initial_nstates = self.nstates
# Keep trying until at least one state is found, increasing the distance each time.
while self.nstates == initial_nstates:
for (atom1, atom2) in [
(acceptor, None) for acceptor in acceptors
] + donors:
if atom1 == self.parent or atom1 == self.parentparent:
continue
# Trim any states pointing head-to-head with an N-H.
distance = ct.measure(self.parent, atom1)
if distance <= distance_check:
angle = ct.measure(self.parentparent, self.parent,
atom1)
if (angle <= self.hbond_heavy_max_angle) and (
angle >= self.hbond_heavy_min_angle):
# Measure what the angle to the atom is.
dihedral = ct.measure(self.parentparentparent,
self.parentparent,
self.parent, atom1)
self.nstates += 1
self.dihedrals.append(dihedral)
self.state_names.append("Orientation")
used_donor_heavies = set()
for pair in donors:
if pair[0] == self.parent or pair[0] == self.parentparent:
continue
if pair[0] in used_donor_heavies:
continue
used_donor_heavies.add(pair[0])
distance = ct.measure(self.parent, pair[0])
if distance <= distance_check:
# Measure what the angle to the atom is (should it be from the heavy or the H?).
dihedral = ct.measure(self.parentparentparent,
self.parentparent, self.parent,
pair[0])
# Add +/-120 to it.
dihedral += 120.0
if dihedral > 180.0:
dihedral -= 360.0
elif dihedral < -180.0:
dihedral += 360.0
self.nstates += 1
self.dihedrals.append(dihedral)
self.state_names.append("Orientation")
dihedral -= 240.0
if dihedral > 180.0:
dihedral -= 360.0
elif dihedral < -180.0:
dihedral += 360.0
self.nstates += 1
self.dihedrals.append(dihedral)
self.state_names.append("Orientation")
distance_check += 0.25
if distance_check >= 50.0:
self.nstates += 1
self.dihedrals.append(0.0)
self.state_names.append("Orientation")
break
# Add one state that doesn't touch anything, just in case.
if ct.atom[self.hydrogen].pdbres != "TYR ":
dihedral_save = ct.measure(self.parentparentparent,
self.parentparent, self.parent,
self.hydrogen)
for idihedral in range(36):
ct.adjust(
float(idihedral) * 10.0, self.parentparentparent,
self.parentparent, self.parent, self.hydrogen)
nclashes = analyze.evaluate_asl(
ct,
'(not (atom.num %d) and within 2 (atom.num %d)) and not (withinbonds 2 (atom.num %d))'
% (self.hydrogen, self.hydrogen, self.hydrogen))
if len(nclashes) == 0:
self.nstates += 1
self.dihedrals.append(float(idihedral) * 10.0)
self.state_names.append("Default")
break
ct.adjust(dihedral_save, self.parentparentparent,
self.parentparent, self.parent, self.hydrogen)
# Trim any disallowed states.
for istate in range(len(self.dihedrals) - 1, -1, -1):
idihedral = self.dihedrals[istate]
iname = self.state_names[istate]
# Don't trim original state
if (istate == 0) and include_initial:
continue
if ct.atom[self.hydrogen].pdbres == "TYR " and math.fabs(
idihedral) > 30.0 and math.fabs(idihedral) < 150.0:
if math.fabs(idihedral) < 40.0:
report(
LOG_FULL_DEBUG, 'Adjusting state ' + iname + ' (' +
str(idihedral) + ') to be more planar.')
if idihedral < 0.0:
self.dihedrals[istate] = -30.0
report(LOG_FULL_DEBUG, ' Adjusted to -30.0')
else:
self.dihedrals[istate] = 30.0
report(LOG_FULL_DEBUG, ' Adjusted to 30.0')
elif math.fabs(idihedral) > 140.0:
report(
LOG_FULL_DEBUG, 'Adjusting state ' + iname + ' (' +
str(idihedral) + ') to be more planar.')
if idihedral < 0.0:
self.dihedrals[istate] = -150.0
report(LOG_FULL_DEBUG, ' Adjusted to -150.0')
else:
self.dihedrals[istate] = 150.0
report(LOG_FULL_DEBUG, ' Adjusted to 150.0')
else:
report(
LOG_FULL_DEBUG, 'Removing state ' + iname + ' (' +
str(idihedral) + ') - too far out of plane.')
self.dihedrals.remove(idihedral)
self.state_names.remove(iname)
self.nstates -= 1
elif self.periodicity == 3:
for (angle, heavy) in self.eclipse_is_heavy:
if heavy:
cutoff = 30.0
else:
cutoff = 10.0
if math.fabs(idihedral - angle) < cutoff:
report(
LOG_FULL_DEBUG,
'Removing state ' + iname + ' (' +
str(idihedral) + ') - too close to eclipsed.')
try:
self.dihedrals.remove(idihedral)
self.state_names.remove(iname)
self.nstates -= 1
except:
report(
LOG_NONE,
'ERROR: Failure removing state for ' +
self.name +
' - Please check this residue for structural inconsistencies.'
)
if ct.atom[self.hydrogen].pdbres == "TYR ":
self.nstates += 1
self.dihedrals.append(0.0)
self.state_names.append("Default 1")
self.nstates += 1
self.dihedrals.append(180.0)
self.state_names.append("Default 2")
# Add standard staggered states.
elif self.periodicity == 3:
self.nstates += 1
self.dihedrals.append(60.0)
self.state_names.append("Stagger 1")
self.nstates += 1
self.dihedrals.append(180.0)
self.state_names.append("Stagger 2")
self.nstates += 1
self.dihedrals.append(300.0)
self.state_names.append("Stagger 3")
# Trim any redundant states.
for istate in range(len(self.dihedrals) - 1, 0, -1):
idihedral = self.dihedrals[istate]
iname = self.state_names[istate]
for jstate in range(istate - 1, -1, -1):
jdihedral = self.dihedrals[jstate]
jname = self.state_names[jstate]
if abs(idihedral - jdihedral) < 10.0:
report(
LOG_FULL_DEBUG, 'Removing state ' + iname + ' (' +
str(idihedral) + ') - too similar to ' + jname +
'(' + str(idihedral) + ')')
self.dihedrals.remove(idihedral)
self.state_names.remove(iname)
self.nstates -= 1
break
# Name the states
if include_initial:
for istate in range(1, len(self.state_names)):
self.state_names[istate] += ' %d/%d' % (istate,
self.nstates - 1)
else:
for istate in range(0, len(self.state_names)):
self.state_names[istate] += ' %d/%d' % (istate + 1,
self.nstates)
# TODO Do not create a neutral state for these, as
# there we have no insight into the accuracy of pKa prediction for
# these.
#if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ']:
# self.state_names.append('CYT ')
# self.dihedrals.append(None)
# self.nstates += 1
#elif ct.atom[self.parent].pdbres in ['TYR ']:
# self.state_names.append('Charged')
# self.dihedrals.append(None)
# self.nstates += 1
self.pH_penalties = []
for istate in range(self.nstates):
if self.ionizable:
# Penalties will come later if using PropKa.
if pH is None:
self.pH_penalties.append(0.0)
continue
# Not using PropKa.
if self.dihedrals[istate] is None:
if pH is None:
self.pH_penalties.append(0.0)
elif pH == pH_high:
self.pH_penalties.append(0.0)
else:
# Really don't want to deprotonate at normal pH
self.pH_penalties.append(200.0)
else:
if pH is None:
self.pH_penalties.append(0.0)
elif pH == pH_high:
# Really don't want to deprotonate at normal pH
self.pH_penalties.append(200.0)
else:
self.pH_penalties.append(0.0)
else:
self.pH_penalties.append(0.0)
if self.current_state == -1:
self.current_state = self.nstates - 1
if self.initial_state == -1:
self.initial_state = self.nstates - 1
self.contact_penalties = [0.0 for i in range(self.nstates)]
self.penalties = [
self.pH_penalties[i] + self.contact_penalties[i]
for i in range(self.nstates)
]
return
[docs] def lock_protonation(self):
if not self.ionizable:
return
if self.initial_state == (self.nstates - 1):
self.contact_penalties = [1000.0 for i in range(self.nstates)]
self.contact_penalties[-1] = 0.0
else:
self.contact_penalties = [0.0 for i in range(self.nstates)]
self.contact_penalties[-1] = 1000.0
self.penalties = [
self.pH_penalties[i] + self.contact_penalties[i]
for i in range(self.nstates)
]
return
[docs] def add_current_to_states(self, ct):
dihedral = ct.measure(self.parentparentparent, self.parentparent,
self.parent, self.hydrogen)
self.dihedrals.append(dihedral)
self.nstates += 1
self.num_user_states += 1
self.state_names.append("User %d" % self.num_user_states)
self.penalties.append(0.0)
[docs] def assign_state(self,
ct,
istate,
add_labels=True,
label_pkas=False,
state_gaps=None,
verbose=False):
if not self.treated:
self.pre_treat(ct)
atoms_to_delete = []
ct.atom[self.parent].label_format = "%UT"
ct.atom[self.parent].label_color = label_color
if self.dihedrals[istate] is None:
ct.atom[self.parent].formal_charge = -1
atoms_to_delete = [self.hydrogen]
if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ']:
residue_name = 'CYT '
prot_label = 'CYT'
elif ct.atom[self.parent].pdbres == 'TYR ':
residue_name = 'TYR '
prot_label = 'Charged'
else:
ct.adjust(self.dihedrals[istate], self.parentparentparent,
self.parentparent, self.parent, self.hydrogen)
if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ']:
residue_name = 'CYS '
elif ct.atom[self.parent].pdbres == 'TYR ':
residue_name = 'TYR '
prot_label = ''
if add_labels:
label = prot_label
else:
label = ''
if self.pka is not None and label_pkas:
label += ' %.2f' % self.pka
ct.atom[self.parent].property[pka_property] = self.pka
ct.atom[self.parent].label_user_text = label
self.assign_state_gap(ct.atom[self.parent],
state_gaps,
report_gaps=verbose)
if self.ionizable:
residue_atoms = ct.getResidueAtoms(self.parent)
for atom in residue_atoms:
atom.pdbres = residue_name
self.current_state = istate
self.treated = False
return atoms_to_delete
[docs] def update_atom_indices(self, ct, new_indices):
# Gotta be a better way...
self.hydrogen = self.get_new_index(ct, self.hydrogen, new_indices)
self.parent = self.get_new_index(ct, self.parent, new_indices)
self.parentparent = self.get_new_index(ct, self.parentparent,
new_indices)
self.parentparentparent = self.get_new_index(
ct, self.parentparentparent, new_indices)
[docs] def get_heavies(self):
return [self.parent]
[docs] def get_state_sites(self, ct, istate):
if self.dihedrals[istate] is None:
return ([self.parent], [], [], 0.0)
else:
ct.adjust(self.dihedrals[istate], self.parentparentparent,
self.parentparent, self.parent, self.hydrogen)
return ([self.parent], [(self.parent, self.hydrogen)], [], 0.0)
[docs] def get_view_atoms(self):
return [self.parent]
[docs] def get_penalty(self, istate):
return self.penalties[istate]
[docs] def get_adjustable_atoms(self):
return [self.hydrogen, self.parent]
[docs] def change_pka(self, pka, pH):
if pka is None:
reference_pka = 15.00
else:
reference_pka = pka
self.pka = pka
if not self.ionizable:
return False
if reference_pka < pH:
ionized = True
else:
ionized = False
new_penalties = []
changed = False
for istate, dihedral in enumerate(self.dihedrals):
if dihedral is None:
if ionized:
new_penalties.append(0.0 +
self.contact_penalties[istate])
else:
# Really don't want to deprotonate at normal pH
new_penalties.append(200.0 +
self.contact_penalties[istate])
else:
if ionized:
new_penalties.append(200.0 +
self.contact_penalties[istate])
else:
new_penalties.append(0.0 +
self.contact_penalties[istate])
if self.penalties[istate] != new_penalties[istate]:
changed = True
self.penalties = new_penalties
return changed
[docs] def change_empirical_pka(self, pH):
# Never deprotonate
penalty = 200.0
pH_penalties = []
for dihedral in self.dihedrals:
# Deprotonated state
if dihedral is None:
pH_penalties.append(penalty)
else:
pH_penalties.append(0)
self.pH_penalties = pH_penalties
self.penalties = [
p1 + p2 for p1, p2 in zip(self.contact_penalties, pH_penalties)
]
[docs] class amine_changeable(changeable):
# Could be used for all primary amines, but for now let's stick to LYS sidechains.
asl = '((res.ptype \"LYS \",\"LYN \") AND (atom.ptype \" NZ \"))'
[docs] def __init__(self, ct, iatom):
self.pka = None
self.index = 0
self.nstates = 0
self.num_user_states = 0
self.state_names = []
self.nitrogen = None
self.carbon = None
self.hyds = []
self.hyd_coords = []
self.initial_coords = []
self.locked = False
self.valid = True
self.name = get_residue_string(ct.atom[iatom])
self.type = 'AMINE'
self.initial_coords = []
for atom in ct.atom[iatom].bonded_atoms:
if atom.atomic_number == 1:
self.hyds.append(int(atom))
elif self.carbon is None:
self.carbon = int(atom)
else:
report(LOG_EXTRA,
'Rejecting ' + self.name + ' due to covalent bond.')
self.valid = False
return
if len(self.hyds) not in [2, 3]:
report(
LOG_EXTRA,
'Rejecting ' + self.name + ' due to incorrect bound count.')
self.valid = False
return
self.nitrogen = iatom
for hyd in self.hyds:
self.initial_coords.append(ct.atom[hyd].xyz)
self.current_state = 0
self.initial_state = 0
return
[docs] def pre_treat_1(self, ct):
ct.atom[self.nitrogen].formal_charge = 1
return
[docs] def pre_treat_2(self, ct):
self.hyds = []
for hyd in ct.atom[self.nitrogen].bonded_atoms:
if hyd.atomic_number == 1:
self.hyds.append(int(hyd))
hyd.property['i_pa_atomindex'] = int(hyd)
self.treated = True
return
[docs] def enumerate_states(self,
ct,
acceptors,
donors,
pH,
do_flips=True,
sample_neutral_states=False,
include_initial=False):
"""
Generate states for lysines.
States are generated by rotating hydrogens for acceptor/donor
interactions and by optionally including the neutral state.
:param ct: Structure to generate states for
:type ct: Structure
:param acceptors: List of acceptor atom indices
:type acceptors: List[int]
:param donors: List of donor atom indices
:type donors: List[(int, int)]
:param pH: pH of system
:type pH: float
:param do_flips: Does nothing
:type do_flips: bool
:param sample_neutral_states: Include neutral states. Since
PROPKA's pKa prediction is unreliable for Lys, currently we
have no method of confidently assess whether it is neutral. So
it's turned off by default.
:type sample_neutral_states: bool
:param include_initial: Include the initial state of the Lys
:type include_initial: bool
"""
dihedral_atoms = get_dihedral_atoms(ct, self.hyds[0])
distance_check = self.max_hbond_distance
if include_initial:
self.hyd_coords.append(self.initial_coords)
self.state_names.append('Initial')
self.nstates += 1
initial_nstates = self.nstates
while self.nstates == initial_nstates:
for (atom1, atom2) in [
(acceptor, None) for acceptor in acceptors
] + donors:
if atom1 == self.nitrogen:
continue
distance = ct.measure(self.nitrogen, atom1)
if distance <= distance_check:
angle = ct.measure(self.carbon, self.nitrogen, atom1)
if (angle <= self.hbond_heavy_max_angle) and (
angle >= self.hbond_heavy_min_angle):
dihedral = ct.measure(atom1, dihedral_atoms[1],
dihedral_atoms[2],
dihedral_atoms[3])
eclipsed = False
for eclipse_ang in [0.0, 120.0, 240.0]:
delta = math.fabs(
math.fabs(dihedral) - eclipse_ang)
if delta >= 360.0:
delta -= 360.0
if delta < 30.0:
eclipsed = True
break
if not eclipsed:
ct.adjust(dihedral, dihedral_atoms[3],
dihedral_atoms[2], dihedral_atoms[1],
dihedral_atoms[0])
coords = []
for hyd in self.hyds:
coords.append(ct.atom[hyd].xyz)
self.hyd_coords.append(coords)
self.state_names.append("Orientation")
self.nstates += 1
distance_check += 0.25
if distance_check > MAX_ORIENT_HYDROGEN_DISTANCE:
break
# Add indexes to the names
if include_initial:
for istate in range(1, len(self.state_names)):
self.state_names[istate] += ' %d/%d' % (istate,
self.nstates - 1)
else:
for istate in range(len(self.state_names)):
self.state_names[istate] += ' %d/%d' % (istate + 1,
self.nstates)
# Add a standard staggered state.
ct.adjust(60.0, dihedral_atoms[3], dihedral_atoms[2],
dihedral_atoms[1], dihedral_atoms[0])
coords = []
for hyd in self.hyds:
coords.append(ct.atom[hyd].xyz)
self.hyd_coords.append(coords)
self.state_names.append("Stagger")
self.nstates += 1
self.pH_penalties = []
# Protonation penalties for charged forms.
for istate in range(self.nstates):
self.pH_penalties.append(0.0)
# After generating the charged states, now generate 3 neutral
# states for each charged state.
if sample_neutral_states:
first_to_expand = 1 if include_initial else 0
for istate in range(first_to_expand, self.nstates):
coords = self.hyd_coords[istate]
self.hyd_coords.append([coords[0], coords[1]])
self.state_names.append(self.state_names[istate] + ' LYN1')
self.hyd_coords.append([coords[0], coords[2]])
self.state_names.append(self.state_names[istate] + ' LYN2')
self.hyd_coords.append([coords[1], coords[2]])
self.state_names.append(self.state_names[istate] + ' LYN3')
self.nstates += 3
if pH is None:
self.pH_penalties.extend([0.0, 0.0, 0.0])
else:
self.pH_penalties.extend([100.0, 100.0, 100.0])
self.contact_penalties = [0.0 for i in range(self.nstates)]
self.penalties = [
self.pH_penalties[i] + self.contact_penalties[i]
for i in range(self.nstates)
]
[docs] def lock_protonation(self):
nhyd_desired = len(self.initial_coords)
for istate in range(self.nstates):
if len(self.hyd_coords[istate]) == nhyd_desired:
self.contact_penalties[istate] = 0.0
else:
self.contact_penalties[istate] = 1000.0
self.penalties = [
self.pH_penalties[i] + self.contact_penalties[i]
for i in range(self.nstates)
]
return
[docs] def assign_state(self,
ct,
istate,
add_labels=True,
label_pkas=False,
state_gaps=None,
verbose=False):
if not self.treated:
self.pre_treat(ct)
atoms_to_delete = []
ct.atom[self.nitrogen].label_format = "%UT"
ct.atom[self.nitrogen].label_color = label_color
residue_name = 'LYS '
prot_label = ''
if len(self.hyd_coords[istate]) == 2:
residue_name = 'LYN '
prot_label = 'Neutral'
atoms_to_delete.append(self.hyds[2])
self.hyds[2] = None
ct.atom[self.nitrogen].formal_charge = 0
else:
ct.atom[self.nitrogen].formal_charge = 1
for ihyd, hyd_coords in enumerate(self.hyd_coords[istate]):
ct.atom[self.hyds[ihyd]].xyz = hyd_coords
if add_labels:
label = prot_label
else:
label = ''
if self.pka is not None and label_pkas:
label += ' %.2f' % self.pka
ct.atom[self.nitrogen].property[pka_property] = self.pka
ct.atom[self.nitrogen].label_user_text = label
self.assign_state_gap(ct.atom[self.nitrogen],
state_gaps,
report_gaps=verbose)
residue_atoms = ct.getResidueAtoms(self.nitrogen)
for atom in residue_atoms:
atom.pdbres = residue_name
self.current_state = istate
self.treated = False
return atoms_to_delete
[docs] def update_atom_indices(self, ct, new_indices):
# Gotta be a better way...
self.nitrogen = self.get_new_index(ct, self.nitrogen, new_indices)
self.carbon = self.get_new_index(ct, self.carbon, new_indices)
for i in range(len(self.hyds)):
if self.hyds[i] is not None:
self.hyds[i] = self.get_new_index(ct, self.hyds[i],
new_indices)
return
[docs] def get_heavies(self):
return [self.nitrogen]
[docs] def get_state_sites(self, ct, istate):
acceptors = []
donors = []
for ihyd, hyd_coords in enumerate(self.hyd_coords[istate]):
ct.atom[self.hyds[ihyd]].xyz = hyd_coords
donors.append((self.nitrogen, self.hyds[ihyd]))
if len(self.hyd_coords[istate]) == 2:
acceptors = [self.nitrogen]
return (acceptors, donors, [], 0.0)
[docs] def get_view_atoms(self):
return [self.nitrogen]
[docs] def get_penalty(self, istate):
return self.penalties[istate]
[docs] def change_pka(self, pka, pH):
if pka is None:
reference_pka = 15.00
else:
reference_pka = pka
self.pka = pka
neutral = reference_pka < pH
new_penalties = []
changed = False
for istate, hyd_coords in enumerate(self.hyd_coords):
nhyds = len(hyd_coords)
pH_penalty = 0
if (nhyds == 2 and not neutral) or (nhyds == 3 and neutral):
pH_penalty = 100
new_penalty = pH_penalty + self.contact_penalties[istate]
new_penalties.append(new_penalty)
if self.penalties[istate] != new_penalty:
changed = True
self.penalties = new_penalties
return changed
[docs] def change_empirical_pka(self, pH):
# Never deprotonate
pH_penalties = []
penalty = 100
for hyd_coords in self.hyd_coords:
nhyds = len(hyd_coords)
if nhyds == 2:
pH_penalties.append(penalty)
else:
pH_penalties.append(0)
self.pH_penalties = pH_penalties
self.penalties = [
p1 + p2 for p1, p2 in zip(self.contact_penalties, pH_penalties)
]
[docs] class water_changeable(changeable):
asl = "(water) AND (atom.ele O)"
redundancy_tolerance = 0.5
[docs] def __init__(self, ct, iatom):
self.pka = None
self.index = 0
self.name = ''
self.num_user_states = 0
self.state_names = []
self.state_coordinates = []
self.oxygen = iatom
self.hydrogen1 = None
self.hydrogen2 = None
self.valid = True
self.name = get_residue_string(ct.atom[iatom])
self.type = 'WATER'
self.interacts_with_nonwater = False
for atom in ct.atom[iatom].bonded_atoms:
if atom.atomic_number != 1:
continue
if self.hydrogen1 is None:
self.hydrogen1 = int(atom)
elif self.hydrogen2 is None:
self.hydrogen2 = int(atom)
else:
report(
LOG_EXTRA,
'Rejecting ' + self.name + ' due to excess hydrogens.')
self.valid = False
return
if self.hydrogen1 is None or self.hydrogen2 is None:
report(LOG_EXTRA,
'Rejecting ' + self.name + ' due to lack of hydrogens.')
self.valid = False
return
self.treated = True
self.locked = False
self.current_state = 0
self.initial_state = 0
@property
def nstates(self):
"""Return number of enumerated states"""
return len(self.state_coordinates)
[docs] def enumerate_states(self,
ct,
acceptors,
donors,
pH,
do_flips=True,
include_initial=False):
"""
Generate discrete states for water, where a state is defined by the
coordinates of its two hydrogens.
:param ct: Structure
:type ct: Structure
:param acceptors: Acceptor atom indices
:type acceptors: List[int]
:param donors: Donor heavy-hydrogen atom indices
:type donors: List[Tuple(int, int)]
:param pH: Does nothing here
:type pH: float
:param do_flips: Does nothing here
:type do_flips: bool
:param include_initial: Include the current water orientation in the state list
:type include_initial: bool
"""
# Get the atom objects for easy access to attributes
oxygen = ct.atom[self.oxygen]
hydrogen1 = ct.atom[self.hydrogen1]
hydrogen2 = ct.atom[self.hydrogen2]
# Include initial state.
if include_initial:
self.state_coordinates.append((hydrogen1.xyz, hydrogen2.xyz))
self.state_names.append("Initial")
enumerator = WaterStateEnumerator(ct, ct.atom[self.oxygen],
acceptors, donors)
states = enumerator.enumerate_donor_donor_states()
states += enumerator.enumerate_acceptor_acceptor_states()
# Only enumerate additional less favorable states if the current
# pool is not too large.
if len(states) < 30:
states += enumerator.enumerate_donor_states()
states += enumerator.enumerate_acceptor_states()
for state in states:
self.state_coordinates.append(state.coordinates)
self.state_names.append("Orientation")
# In case no acceptors or donors are nearby and no states are
# generated, just include the initial state.
if self.nstates == 0:
self.state_coordinates.append((hydrogen1.xyz, hydrogen2.xyz))
self.state_names.append("Initial")
# Name the states
if include_initial:
for istate in range(1, len(self.state_names)):
self.state_names[istate] += ' %d/%d' % (istate,
self.nstates - 1)
else:
for istate in range(0, len(self.state_names)):
self.state_names[istate] += ' %d/%d' % (istate + 1,
self.nstates)
[docs] def add_current_to_states(self, ct):
xyz1 = ct.atom[self.hydrogen1].xyz
xyz2 = ct.atom[self.hydrogen2].xyz
self.state_coordinates.append((xyz1, xyz2))
self.num_user_states += 1
self.state_names.append("User %d" % self.num_user_states)
[docs] def assign_state(self,
ct,
istate,
add_labels=True,
label_pkas=False,
state_gaps=None,
verbose=False):
xyz1, xyz2 = self.state_coordinates[istate]
ct.atom[self.hydrogen1].xyz = xyz1
ct.atom[self.hydrogen2].xyz = xyz2
self.current_state = istate
return []
[docs] def update_atom_indices(self, ct, new_indices):
# Gotta be a better way...
self.oxygen = self.get_new_index(ct, self.oxygen, new_indices)
self.hydrogen1 = self.get_new_index(ct, self.hydrogen1, new_indices)
self.hydrogen2 = self.get_new_index(ct, self.hydrogen2, new_indices)
return
[docs] def get_heavies(self):
return [self.oxygen]
[docs] def get_state_sites(self, ct, istate):
xyz1, xyz2 = self.state_coordinates[istate]
ct.atom[self.hydrogen1].xyz = xyz1
ct.atom[self.hydrogen2].xyz = xyz2
return ([self.oxygen], [(self.oxygen, self.hydrogen1),
(self.oxygen, self.hydrogen2)], [], 0.0)
[docs] def get_view_atoms(self):
return [self.oxygen, self.hydrogen1, self.hydrogen2]
[docs] def get_penalty(self, istate):
return 0.0
[docs] def get_adjustable_atoms(self):
return [self.oxygen, self.hydrogen1, self.hydrogen2]
[docs] class hbond_cluster:
[docs] def __init__(self):
self.changeables = []
self.self_scores = {}
self.pair_scores = {}
self.self_charges = {}
self.combinations = []
self.static_acceptors = []
self.static_donors = []
self.static_clashers = []
self.max_keep = 10
self.abort_sampling = False
self.num_scored = 0
self.max_score = 100000
self.use_xtal = False
self.trouble = False
self.failed = False
# Save the cutoff values as an attribute as these can be changed
# during optimization.
self.polar_clash_cutoff = POLAR_CLASH_CUTOFF
self.nonpolar_clash_cutoff = NONPOLAR_CLASH_CUTOFF
self.acceptor_acceptor_clash_cutoff = ACCEPTOR_ACCEPTOR_CLASH_CUTOFF
[docs] def setup_xtal(self, ct, interact, clustering_distance):
# Set up a lookup dictionary with changeable index as key as a set
# of changeable indices for which it requres an xtal mate.
self.need_xtal = defaultdict(set)
if not self.use_xtal:
return
global_to_local_index = {
c.index: n for n, c in enumerate(self.changeables)
}
for ichangeable in range(len(self.changeables)):
changeable1 = self.changeables[ichangeable]
heavies1 = changeable1.get_heavies()
for index2 in interact[changeable1.index]:
if index2 <= changeable1.index:
continue
jchangeable = global_to_local_index.get(index2)
if jchangeable is None:
continue
changeable2 = self.changeables[jchangeable]
heavies2 = changeable2.get_heavies()
# See if they interact without xtal
for iatom, jatom in itertools.product(heavies1, heavies2):
if ct.measure(iatom, jatom) <= clustering_distance:
# We don't need crystal mate for this changeable
break
else:
self.need_xtal[changeable1.index].add(changeable2.index)
self.need_xtal[changeable2.index].add(changeable1.index)
report(
LOG_EXTRA, self.changeables[ichangeable].name +
' and ' + self.changeables[jchangeable].name +
' interact via crystal symmetry only')
[docs] def optimize(self,
ct: structure.Structure,
interact: Dict[int, Set[int]],
static_donors: List[Tuple[int, int]],
static_acceptors: List[int],
static_clashers: List[int],
max_comb: int,
num_sequential_cycles: int,
use_propka: bool,
propka_pH: float = 7.0,
annotated_ct: Optional[structure.Structure] = None):
"""
Optimize hydrogen bond network and protonation states of
changeables in this cluster
:param ct: Structure containing changeables to optimize
:param interact: Interaction lookup dict for changeable-changeable interactions
:param static_donors: List of static donor atom indices
:param static_acceptors: List of static acceptor atom indices
:param static_clashers: List of static clasher atom indices
:param max_comb: Maximum number of combinations at which an exhaustive search is performed.
:param num_sequential_cycles: Number of optimization cycles when using heuristic search.
:param use_propka: Use PROPKA to determine pKa values of changeables
:param propka_pH: pH of system when using PROPKA for pKa determination
:param annotated_ct: Annotated structure that may contain xtal mates for faster optimization. Passing this, skips the creation of crystal mates and structure annotation during the self-scoring phase.
"""
for changeable in self.changeables:
if not changeable.treated:
changeable.pre_treat(ct)
self.static_acceptors = static_acceptors
self.static_donors = static_donors
self.static_clashers = static_clashers
potential_combinations = 1
for changeable in self.changeables:
potential_combinations *= changeable.nstates
if annotated_ct is None:
annotated_ct = generate_annotated_ct(ct, static_donors,
static_acceptors,
static_clashers,
self.use_xtal)
# Initialize and pre-calculate scores
self.combinations = []
report(LOG_DEBUG, " Prescoring self")
self.pre_score_self(annotated_ct)
self.pre_score_pairs(ct, interact)
report(LOG_BASIC)
if potential_combinations <= max_comb:
report(
LOG_BASIC,
' Scoring exhaustively (%d potential combinations)...' %
potential_combinations)
self.score_exhaustively(ct,
interact,
find_all_solutions=True,
tolerate_clashes=True)
else:
if TOULBAR2_AVAILABLE:
report(LOG_BASIC, " Trying exact network solver")
report(LOG_BASIC,
" Stage1: Solving network exact and diversify")
combinations = self.solve_network_exact_and_diversify(
ct, interact, ncombinations=1)
for combination in combinations:
energy = self.score_combination(ct, interact,
combination)
self.combinations.append((combination, 0.0, energy))
if not self.combinations:
if TOULBAR2_AVAILABLE:
report(
LOG_BASIC,
" Exact network solver failed. Falling back to stochastic optimization"
)
report(
LOG_BASIC,
' Scoring via sequential iteration and hybridization (%d potential combinations)...'
% potential_combinations)
report(LOG_BASIC, ' Beginning sequential iteration...')
self.score_sequentially(ct, interact, num_sequential_cycles)
self.expand_solutions(ct, interact)
self.recombine_solutions(ct, interact)
self.trim_redundant_combinations()
# Log some results
report(LOG_BASIC)
report(LOG_BASIC, ' Cluster results:')
for i, changeable in enumerate(self.changeables):
state_name = changeable.state_names[self.combinations[0][0][i]]
if changeable.pka is None:
msg = f' {changeable.name} : {state_name}'
else:
msg = f' {changeable.name} : {state_name} (pKa = {changeable.pka:.2f})'
report(LOG_BASIC, msg)
report(LOG_EXTRA, ' Top %d results:' % len(self.combinations))
for combination in self.combinations:
report(LOG_EXTRA, ' ' + str(combination))
report(LOG_BASIC)
[docs] def solve_network_exact_and_diversify(self,
ct,
interact,
ncombinations,
delta_energy=2.0):
"""
Solve the network exact
:param ct: Structure
:type ct: Structure
:param interact: Changeable interaction lookup table
:type interact: Dict[int, Set[int]]
:param ncombinations: Number of maximum combinations to return
:type ncombinations: int
:param delta_energy: Maximum energy difference of combinations generated compared to the global minimum
:type delta_energy: float
:return: List of combinations with first element being the global
optimum combination or empty list if no optimum found
:rtype: List[List[int]]
"""
# Get an upper bound energy by just calculating the ennergy of the
# current combination and add 1 to it.
current_combination = [c.current_state for c in self.changeables]
upper_bound = self.score_combination(ct, interact,
current_combination) + 1
solver = NetworkSolver(self, interact, upper_bound)
optimal_combination = solver.optimal_solution()
if optimal_combination is None:
return []
optimal_energy = self.score_combination(ct, interact,
optimal_combination)
report(
LOG_BASIC,
f" Optimal network combination found with energy: {optimal_energy:.5f}"
)
if ncombinations <= 1:
return [optimal_combination]
# Generate additional combinations for diversity To increase
# diversity, reduce the set down by picking some random
# combinations. We reduce here, because toulbar2 is greedy in its
# generation of combinations and subsequent combinations might be
# very similar to previous ones.
combinations = solver.explore_solutions(upper_bound=optimal_energy +
delta_energy,
number=1000)
# Make sure the global minimum is part of the list.
if combinations:
k = max(min(len(combinations), ncombinations - 1), 0)
combinations = random.sample(combinations, k=k)
return [optimal_combination] + combinations
[docs] def score_combination(self, ct, interact, states):
# This is for calculating the energy of a single set of states, and assumes prescoring of self terms.
energy = 0.0
global_to_local_index = {
c.index: n for n, c in enumerate(self.changeables)
}
for ichangeable, istate in enumerate(states):
changeable = self.changeables[ichangeable]
energy += self.self_scores[changeable.index][istate]
(iacceptors, idonors, iclashers,
icharge) = changeable.get_state_sites(ct, istate)
for index2 in interact[changeable.index]:
if index2 <= changeable.index:
continue
jchangeable = global_to_local_index.get(index2)
if jchangeable is None:
continue
jstate = states[jchangeable]
key = (changeable.index, index2)
pair_scores = self.pair_scores.get(key)
if pair_scores is not None:
energy += pair_scores[istate, jstate]
return energy
[docs] def single_point(self,
ct,
interact,
static_donors,
static_acceptors,
static_clashers,
xtal_ct=None):
# This is for calling from the GUI, and assumes nothing is precalculated.
combination = []
for changeable in self.changeables:
combination.append(changeable.current_state)
if not changeable.treated:
changeable.pre_treat(ct)
if xtal_ct is None:
self.setup_local_static(ct, static_acceptors, static_donors,
static_clashers)
else:
self.setup_local_static_alt(xtal_ct, static_acceptors,
static_donors, static_clashers)
global_to_local_index = {
c.index: n for n, c in enumerate(self.changeables)
}
self.single_point_score = 0.0
for ichangeable in range(len(self.changeables)):
changeable1 = self.changeables[ichangeable]
#istate = self.changeables[ichangeable].current_state
istate = combination[ichangeable]
(iacceptors, idonors, iclashers,
icharge) = changeable1.get_state_sites(ct, istate)
self.single_point_score += self.score_pair(
ct,
iacceptors,
idonors,
iclashers,
icharge,
self.static_acceptors,
self.static_donors,
self.static_clashers,
0.0,
use_xtal=self.use_xtal)
self.single_point_score += changeable1.get_penalty(istate)
for index2 in interact[changeable1.index]:
if index2 <= changeable1.index:
continue
jchangeable = global_to_local_index.get(index2)
if jchangeable is None:
continue
changeable2 = self.changeables[jchangeable]
jstate = combination[jchangeable]
(jacceptors, jdonors, jclashers,
jcharge) = changeable2.get_state_sites(ct, jstate)
self.single_point_score += self.score_pair(
ct,
iacceptors,
idonors,
iclashers,
icharge,
jacceptors,
jdonors,
jclashers,
jcharge,
use_xtal=index2 in self.need_xtal[changeable1.index])
# Restore to proper state
atoms_to_delete = []
for i, changeable in enumerate(self.changeables):
atoms = changeable.assign_state(ct, combination[i], False)
atoms_to_delete.extend(atoms)
return atoms_to_delete
[docs] def setup_local_static_alt(self, ct, static_acceptors, static_donors,
static_clashers):
self.static_acceptors = []
self.static_donors = []
self.static_clashers = []
base_distance = 4.0
heavies = []
for changeable in self.changeables:
heavies.extend(changeable.get_heavies())
max_interaction_distance = base_distance
while static_acceptors and static_donors:
atoms = analyze.evaluate_asl(ct, 'within %d atom.i_pa_atomindex %s' % \
(max_interaction_distance, ','.join(str(i) for i in heavies)))
for acceptor in static_acceptors:
if acceptor in atoms and acceptor not in self.static_acceptors:
self.static_acceptors.append(acceptor)
for donor in static_donors:
if donor[0] in atoms and donor not in self.static_donors:
self.static_donors.append(donor)
for clasher in static_clashers:
if clasher in atoms and clasher not in self.static_clashers:
self.static_clashers.append(clasher)
if len(self.static_acceptors) > 0 and len(
self.static_donors) > 0:
break
max_interaction_distance += 0.5
return
[docs] def setup_local_static(self, ct, static_acceptors, static_donors,
static_clashers):
self.static_acceptors = []
self.static_donors = []
self.static_clashers = []
base_distance = 4.0
heavies = []
for changeable in self.changeables:
for atom in changeable.get_heavies():
heavies.append(atom)
if len(static_acceptors) > 0:
max_interaction_distance = base_distance
while len(self.static_acceptors) == 0:
for acceptor in static_acceptors:
for atom in heavies:
if measure(ct,
atom1=atom,
atom2=acceptor,
use_xtal=self.use_xtal
) <= max_interaction_distance:
self.static_acceptors.append(acceptor)
break
max_interaction_distance += 0.5
if len(static_donors) > 0:
max_interaction_distance = base_distance
while len(self.static_donors) == 0:
for donor in static_donors:
for atom in heavies:
if measure(ct,
atom1=atom,
atom2=donor[0],
use_xtal=self.use_xtal
) <= max_interaction_distance:
self.static_donors.append(donor)
break
max_interaction_distance += 0.5
if len(static_clashers) > 0:
max_interaction_distance = base_distance
for clasher in static_clashers:
for atom in heavies:
if measure(ct,
atom1=atom,
atom2=clasher,
use_xtal=self.use_xtal
) <= max_interaction_distance:
self.static_clashers.append(clasher)
break
max_interaction_distance += 0.5
return
[docs] def pre_score_self(self, ct):
"""
Calculate the self score for each state. Interactions are
calculated between the changeable and its static environment.
"""
if ct.property.get(ANNOTATED_PROPERTY):
working_ct = ct
else:
working_ct = generate_annotated_ct(ct,
self.static_donors,
self.static_acceptors,
self.static_clashers,
use_xtal=self.use_xtal)
dcell = create_distance_cell(working_ct,
MAX_HEAVY_HEAVY_INTERACTION_DISTANCE,
honor_pbc=False)
for changeable in self.changeables:
self.self_scores[changeable.index] = np.zeros(
changeable.nstates, dtype=np.double)
self.self_charges[changeable.index] = np.zeros(
changeable.nstates, dtype=np.double)
if changeable.locked:
self.self_scores[changeable.index].fill(100)
self.self_scores[changeable.index][
changeable.current_state] = 0
continue
for istate in range(changeable.nstates):
# Should be okay to use working_ct here, even though
# get_state_sites alters coordinates for rotatables. The
# alteration only need to persist through score_pair().
(iacceptors, idonors, iclashers,
icharge) = changeable.get_state_sites(working_ct, istate)
static_acceptors, static_donors, static_clashers = changeable.get_close_interactors(
working_ct, dcell)
# Get the score (don't need use_xtal even if xtal is on,
# because we have explicit crystalmates.
score = self.score_pair(working_ct,
iacceptors,
idonors,
iclashers,
icharge,
static_acceptors,
static_donors,
static_clashers,
0.0,
use_xtal=False)
penalty = changeable.get_penalty(istate)
total_score = score + penalty
self.self_scores[changeable.index][istate] = total_score
self.self_charges[changeable.index][istate] = icharge
[docs] def pre_score_pairs(self, ct, interact: Dict[int, Set[int]]):
mapper = {
changeable.index: changeable for changeable in self.changeables
}
for changeable1 in self.changeables:
if changeable1.locked:
istates = [changeable1.current_state]
else:
istates = list(range(changeable1.nstates))
for index2 in interact[changeable1.index]:
if index2 <= changeable1.index:
continue
changeable2 = mapper.get(index2)
if changeable2 is None:
continue
if changeable2.locked:
jstates = [changeable2.current_state]
else:
jstates = list(range(changeable2.nstates))
key1 = (changeable1.index, changeable2.index)
key2 = (changeable2.index, changeable1.index)
need_xtal = changeable2.index in self.need_xtal[
changeable1.index]
pair_scores = np.zeros(
(changeable1.nstates, changeable2.nstates),
dtype=np.double)
for istate in istates:
(iacceptors, idonors, iclashers,
icharge) = changeable1.get_state_sites(ct, istate)
for jstate in jstates:
(jacceptors, jdonors, jclashers,
jcharge) = changeable2.get_state_sites(ct, jstate)
score = self.score_pair(ct,
iacceptors,
idonors,
iclashers,
icharge,
jacceptors,
jdonors,
jclashers,
jcharge,
use_xtal=need_xtal)
pair_scores[istate, jstate] = score
self.pair_scores[key1] = pair_scores
self.pair_scores[key2] = pair_scores.T
[docs] def score_pair(self,
ct,
iacceptors,
idonors,
iclashers,
icharge,
jacceptors,
jdonors,
jclashers,
jcharge,
use_xtal=False):
score = 0.0
# Clasher-clasher interaction
for clash1, clash2 in itertools.product(iclashers, jclashers):
distance = measure(ct,
atom1=clash1,
atom2=clash2,
use_xtal=use_xtal)
clashing_term = self.calculate_clash_term(
distance, self.nonpolar_clash_cutoff, base=30)
score += clashing_term
# Clasher-donor interaction
for clashers, donors in [(iclashers, jdonors),
(jclashers, idonors)]:
for donor, clasher in itertools.product(donors, clashers):
hydrogen = donor[1]
distance = measure(ct,
atom1=clasher,
atom2=hydrogen,
use_xtal=use_xtal)
clashing_term = self.calculate_clash_term(
distance, self.nonpolar_clash_cutoff, base=30)
score += clashing_term
# Clashing part of donor-donor interaction
for donor1, donor2 in itertools.product(idonors, jdonors):
h1 = donor1[1]
h2 = donor2[1]
distance = measure(ct, atom1=h1, atom2=h2, use_xtal=use_xtal)
clashing_term = self.calculate_clash_term(
distance, self.polar_clash_cutoff, base=50)
score += clashing_term
# Metals are part of the donor list by convention. Penalize
# interaction if a donor is pointing to a metal. We do not know
# upfront whether a donor is a metal, so we need to calculate
# both combinations.
metal_term = self.score_metal_donor(ct,
donor1,
donor2,
use_xtal=use_xtal)
metal_term += self.score_metal_donor(ct,
donor2,
donor1,
use_xtal=use_xtal)
score += metal_term
# Acceptor-acceptor repulsion. Only counts for static acceptors,
# i.e. acceptors that cannot donate.
donor_acceptors = {d[0] for d in itertools.chain(idonors, jdonors)}
for iacceptor, jacceptor in itertools.product(
iacceptors, jacceptors):
if iacceptor in donor_acceptors or jacceptor in donor_acceptors:
continue
score += self.score_acceptor_acceptor(ct,
iacceptor,
jacceptor,
use_xtal=use_xtal)
# Donor-acceptor interactions. Donors can accept from other donors
acceptor_donor_pairs = ((idonors, jacceptors), (jdonors,
iacceptors))
for donors, acceptors in acceptor_donor_pairs:
for (heavy, hydrogen), acceptor in itertools.product(
donors, acceptors):
# Do not score metal - acceptor interactions
if heavy == hydrogen:
continue
score += self.score_donor_acceptor(ct,
heavy,
hydrogen,
acceptor,
use_xtal=use_xtal)
return score
[docs] def score_acceptor_acceptor(self,
ct: Structure,
iacceptor: int,
jacceptor: int,
use_xtal=False) -> float:
"""
Scoring function for acceptor-acceptor interactions.
These interactions are considered worse than hydrogen clashes, and
its base penalty is thus higher. It is set to 70 as it makes the
scoring function more robust for the Asn and Gln state of 2X9E and
3UZA that interact with the ligand. No other optimization has been
performed.
:return: Score
"""
distance = measure(ct,
atom1=iacceptor,
atom2=jacceptor,
use_xtal=use_xtal)
score = self.calculate_clash_term(
distance, self.acceptor_acceptor_clash_cutoff, base=40)
return score
[docs] def score_donor_acceptor(self,
ct,
donor_heavy,
donor_hydrogen,
acceptor_heavy,
use_xtal=False):
heavy_heavy_distance = measure(ct,
atom1=donor_heavy,
atom2=acceptor_heavy,
use_xtal=use_xtal)
if heavy_heavy_distance > MAX_HEAVY_HEAVY_INTERACTION_DISTANCE:
return 0.0
if heavy_heavy_distance == 0.0:
name1 = get_atom_string(ct.atom[acceptor_heavy])
name2 = get_atom_string(ct.atom[donor_heavy])
report(LOG_BASIC,
f'WARNING: Overlapping atoms found: {name1} - {name2}')
return 0.0
# Acceptor-metal interaction score is only based on distance
if donor_heavy == donor_hydrogen:
return self.calculate_distance_term(heavy_heavy_distance)
angle = measure(ct,
atom1=donor_hydrogen,
atom2=donor_heavy,
atom3=acceptor_heavy,
use_xtal=use_xtal)
angle_term = self.calculate_angle_term(angle)
if angle_term == 0:
return 0.0
distance_term = self.calculate_distance_term(heavy_heavy_distance)
score = distance_term * angle_term
return score
[docs] @staticmethod
def calculate_distance_term(distance):
"""Return distance dependent part of the hydrogen-bond potential functions."""
return -(3.0 / distance)**3
[docs] @staticmethod
def calculate_angle_term(angle):
"""
Return angle dependent part of the hydrogen bond potential.
:param angle: Angle in degrees formed by H-D-A, with Hydrogen, Donor and Acceptor
:param angle: float
:return: Score
:rtype: float
"""
if angle >= 90.0:
return 0
return math.cos(math.radians(angle))**2
[docs] @staticmethod
def calculate_clash_term(distance, cutoff, base=50):
"""Return clash term"""
if distance < cutoff:
# Distinghuish between bad and very bad clashes
return base * (1 + (cutoff - distance))
return 0.0
[docs] def score_exhaustively(self,
ct,
interact,
find_all_solutions=True,
tolerate_clashes=False):
def recurse(changeables, combination, energy, charge, ct, interact,
find_all_solutions, tolerate_clashes):
if len(combination) > 0:
# Abort this branch if we've picked up any clashes with the static.
ichangeable = len(combination) - 1
istate = combination[-1]
changeable1 = self.changeables[ichangeable]
(iacceptors, idonors, iclashers,
icharge) = changeable1.get_state_sites(ct, istate)
self_energy = self.self_scores[changeable1.index][istate]
if self_energy > 10.0 and not tolerate_clashes:
report(LOG_DEBUG, ' Self clash.')
return -1
energy += self_energy
charge += self.self_charges[changeable1.index][istate]
# Abort this branch if we've picked up any clashes with an existing changeable.
for jchangeable in range(len(combination) - 2, -1, -1):
changeable2 = self.changeables[jchangeable]
if changeable2.index not in interact[changeable1.index]:
continue
jstate = combination[jchangeable]
key = (changeable1.index, changeable2.index)
pair_energy = self.pair_scores[key][istate, jstate]
if pair_energy > 10.0 and not tolerate_clashes:
report(
LOG_DEBUG,
' Clash with changeable %d - %s.' %
(jchangeable, changeable2.name))
return jchangeable
else:
energy += pair_energy
if not find_all_solutions:
report(LOG_DEBUG,
' Current combination: %s' % str(combination))
if changeables:
# Delve another level down.
clashes = []
if changeables[0].locked:
new_combination = combination + [
changeables[0].current_state
]
self.num_scored += 1
if not find_all_solutions and self.num_scored > self.max_score:
report(
LOG_BASIC,
' Failed to find valid initial combination in alotted time.'
)
self.abort_sampling = True
return
clash = recurse(changeables[1:], new_combination,
energy, charge, ct, interact,
find_all_solutions, tolerate_clashes)
clashes.append(clash)
if self.abort_sampling:
return None
else:
for state in range(changeables[0].nstates):
new_combination = combination + [state]
self.num_scored += 1
if not find_all_solutions and self.num_scored > self.max_score:
report(
LOG_DEBUG,
' Failed to find valid initial combination in alotted time.'
)
self.abort_sampling = True
return
report(
LOG_DEBUG,
' Trying state %d (%s) for changeable %d (%s).'
% (state, changeables[0].state_names[state],
len(combination), changeables[0].name))
clash = recurse(changeables[1:], new_combination,
energy, charge, ct, interact,
find_all_solutions,
tolerate_clashes)
clashes.append(clash)
if self.abort_sampling:
return None
if self.abort_to_level is not None:
if self.abort_to_level == len(combination):
report(LOG_DEBUG, ' Ceasing abort.')
self.abort_to_level = None
else:
return
if None not in clashes:
highest_clash = 0
for clash in clashes:
if clash > highest_clash:
highest_clash = clash
self.abort_to_level = highest_clash
report(
LOG_DEBUG,
' Aborting to level %d' % self.abort_to_level)
else:
if energy > 10.0 and not tolerate_clashes:
return
# Finished.
report(
LOG_DEBUG, ' Final score = ' + str(energy) +
' (' + str(combination) + ')')
self.combinations.append((combination, charge, energy))
if not find_all_solutions:
# Only wanted one.
self.abort_sampling = True
return None
if len(self.combinations) > self.max_keep:
self.combinations.sort(key=operator.itemgetter(-1))
self.combinations = self.combinations[0:self.max_keep]
self.abort_sampling = False
self.abort_to_level = None
self.num_scored = 0
energy = 0.0
charge = 0.0
recurse(self.changeables, [], energy, charge, ct, interact,
find_all_solutions, tolerate_clashes)
# Final sort.
self.combinations.sort(key=lambda item: item[-1])
[docs] def score_sequentially(self, ct: structure.Structure,
interact: Dict[int, Set[int]],
num_sequential_cycles: int):
"""
This routine uses an algorithm similar to Prime's iteration to
convergence. Starting from a random configuration, each species is
optimized in turn, keeping the others fixed in their current state.
This continues until the system reaches convergence (no more
changes in the most optimal state for all residues).
:param ct: input/output structure, will be modified
:param interact: Interaction lookup table
:param num_sequential_cycles: Number of cycles of randomization
and optimization to conduct
"""
report(LOG_BASIC,
' Stage 1: Sequential iteration with hybridization')
local_combinations = []
randomize = True
num_failed_hybrids = 0
# Passes with different random starting points
for iter in range(num_sequential_cycles):
# Whether or not to create a random starting point, or just work with the current one
if randomize:
comb = [
random.randint(0, changeable.nstates - 1)
for changeable in self.changeables
]
# Main iteration routine.
(total_energy, problem_children) = self.iterate_to_convergence(
ct, interact, comb)
# Append the converged solution
local_combinations.append(
((comb, 0.0, total_energy), problem_children))
report(
LOG_EXTRA,
' Energy for this pass: ' + str(total_energy) +
' (' + str(len(problem_children)) + ' problematic members)')
for problem in problem_children:
report(LOG_EXTRA,
' ' + self.changeables[problem].name)
#for combi in local_combinations:
# print str(combi)
# Successfully found a good solution. Append it to the list and start again.
if len(problem_children) == 0:
report(
LOG_EXTRA,
' No problematic regions for this structure, storing it and starting again.'
)
self.combinations.append((comb, 0.0, total_energy))
local_combinations = []
randomize = True
num_failed_hybrids = 0
continue
# Try to fix the best solution with pieces of the other solutions.
if (iter >= 3) and (iter % 2 == 1):
comb = self.create_hybrid(local_combinations, interact)
if comb is None:
num_failed_hybrids += 1
if num_failed_hybrids == 5:
report(
LOG_EXTRA,
' Too many failed attempts at hybrid. Storing this solution and starting again.'
)
local_combinations.sort(key=lambda x: x[0][-1])
self.combinations.append(local_combinations[0][0])
local_combinations = []
randomize = True
num_failed_hybrids = 0
report(LOG_EXTRA, ' Randomizing...')
randomize = True
else:
num_failed_hybrids = 0
randomize = False
else:
randomize = True
# If none were found, at least add the current ones.
if len(self.combinations) == 0:
for combination in local_combinations:
self.combinations.append(combination[0])
self.combinations.sort(key=operator.itemgetter(-1))
report(
LOG_BASIC, ' Best solution after Stage 1: ' +
str(self.combinations[0][2]))
report(LOG_EXTRA, ' ' + str(self.combinations[0][0]))
return
[docs] def expand_solutions(self, ct, interact):
"""
This takes an existing set of good solutions and generates more by
deconverging them and then iterating them back to convergence.
Generates at least 10 new solutions.
"""
report(LOG_BASIC,
' Stage 2: Diversification of existing solutions')
new_combinations = []
while len(new_combinations) < 10:
for comb in self.combinations:
(local_comb, charge, initial_energy) = copy.deepcopy(comb)
report(
LOG_EXTRA,
' Diverging solution with initial energy of ' +
str(initial_energy))
self.deconverge(ct, interact, local_comb)
report(LOG_EXTRA, ' Now reconverging...')
(final_energy,
problem_children) = self.iterate_to_convergence(
ct, interact, local_comb)
report(
LOG_EXTRA, ' Energy for reconverged solution: ' +
str(final_energy))
new_combinations.append((local_comb, 0.0, final_energy))
self.combinations.extend(new_combinations)
self.combinations.sort(key=operator.itemgetter(-1))
report(
LOG_BASIC, ' Best solution after Stage 2: ' +
str(self.combinations[0][2]))
report(LOG_EXTRA, ' ' + str(self.combinations[0][0]))
return
[docs] def recombine_solutions(self, ct, interact):
"""
This is similar to score_sequentially, but begins with some
pre-existing good solutions in self.combinations, and then creates
hybrids to try to improve on them.
"""
report(LOG_BASIC,
' Stage 3: Recombination of existing solutions')
# Set up the list of combinations, plus their problem children.
# Iterations should do nothing.
local_combinations = []
for (comb, charge, energy) in self.combinations:
(total_energy, problem_children) = self.iterate_to_convergence(
ct, interact, comb, problem_cutoff=0.0)
local_combinations.append(
((comb, 0.0, total_energy), problem_children))
for iter in range(30):
comb = self.create_hybrid(local_combinations,
interact,
random_scaffold=True)
if comb is None:
continue
(total_energy, problem_children) = self.iterate_to_convergence(
ct, interact, comb, problem_cutoff=-0.5)
report(
LOG_EXTRA,
' Energy for this pass: ' + str(total_energy) +
' (' + str(len(problem_children)) + ' problematic members)')
local_combinations.append(
((comb, 0.0, total_energy), problem_children))
for comb in local_combinations:
self.combinations.append(comb[0])
self.combinations.sort(key=operator.itemgetter(-1))
report(
LOG_BASIC, ' Best solution after Stage 3: ' +
str(self.combinations[0][2]))
report(LOG_EXTRA, ' ' + str(self.combinations[0][0]))
return
[docs] def deconverge(self, ct, interact, comb, problem_cutoff=50.0):
"""
This starts with what is assumed to be a good solution, and then
randomizes the states, but not to anything that produces a problem.
"""
global_to_local_index = {
c.index: n for n, c in enumerate(self.changeables)
}
for (ichangeable, changeable) in enumerate(self.changeables):
states = [i for i in range(changeable.nstates)]
random.shuffle(states)
for istate in states:
state_energy = self.self_scores[changeable.index][istate]
for changeable2_index in interact[changeable.index]:
jchangeable = global_to_local_index.get(
changeable2_index)
# local index might not be available if the cluster size has
# been capped in the ProtAssign workflow
if jchangeable is None:
continue
jstate = comb[jchangeable]
key = (changeable.index, changeable2_index)
pair_scores = self.pair_scores.get(key)
if pair_scores is not None:
state_energy += pair_scores[istate, jstate]
if state_energy < problem_cutoff:
comb[ichangeable] = istate
break
[docs] def iterate_to_convergence(self,
ct,
interact,
comb,
problem_cutoff=50.0):
"""
This iterates the combination 'comb' to convergence. Maximum of 10
cycles.
"""
# Create a list of indices for shuffling later. Using a different
# order each time may help avoid a few local minima.
global_to_local_index = {
c.index: n for n, c in enumerate(self.changeables)
}
changeable_list = list(range(len(self.changeables)))
for ipass in range(10):
random.shuffle(changeable_list)
problem_children = []
nchanges = 0
#for ichangeable,changeable in enumerate(self.changeables):
for ichangeable in changeable_list:
changeable = self.changeables[ichangeable]
if changeable.locked:
comb[ichangeable] = changeable.current_state
continue
best_state = -1
best_state_energy = 1000000
for istate in range(changeable.nstates):
state_energy = self.self_scores[
changeable.index][istate]
for changeable2_index in interact[changeable.index]:
jchangeable = global_to_local_index.get(
changeable2_index)
# local index might not be available if the cluster size has
# been capped in the ProtAssign workflow
if jchangeable is None:
continue
jstate = comb[jchangeable]
key = (changeable.index, changeable2_index)
pair_scores = self.pair_scores.get(key)
if pair_scores is None:
continue
state_energy += pair_scores[istate, jstate]
if state_energy < best_state_energy:
best_state = istate
best_state_energy = state_energy
if best_state != comb[ichangeable]:
nchanges += 1
comb[ichangeable] = best_state
report(
LOG_DEBUG, ' Best score for ' + changeable.name +
' is ' + str(best_state_energy) + ' for state ' +
changeable.state_names[best_state])
if best_state_energy > problem_cutoff:
problem_children.append(ichangeable)
if nchanges == 0:
break
total_energy = self.score_combination(ct, interact, comb)
return (total_energy, problem_children)
[docs] def create_hybrid(self,
local_combinations: List[Tuple[Tuple[List[int], float,
float],
List[int]]],
interact: Dict[int, Set[int]],
random_scaffold: bool = False) -> Optional[List[int]]:
"""
This takes the lowest energy solution, and for each problematic
region it searches other solutions (in random order) for any which
may have had better luck for just that part of the overall cluster.
It then splices those solutions into the lowest energy one. If
random_scaffold, then it selects a random solution as the basis in
stead of the lowest energy one.
:param local_combinations: List of combinations, where a
combination is specified as a tuple holding the combination, total
charge, and total energy, and another list of problem children.
:param interact: Interaction lookup table for changeables
:param random_scaffold: Choose a random starting combination,
otherwise start with the first combiation
"""
global_to_local_index = {
c.index: n for n, c in enumerate(self.changeables)
}
# Try to fix the best solution with pieces of the other solutions
report(LOG_EXTRA, ' Creating hybrid solution...')
# Sort based on energy
local_combinations.sort(key=lambda x: x[0][-1])
fixes = {}
neighbours = {}
if random_scaffold:
iscaffold = random.randint(0, len(local_combinations) - 1)
else:
iscaffold = 0
for problem_child in local_combinations[iscaffold][1]:
# Randomize the order in which solutions are searched
randomized_combinations = copy.deepcopy(local_combinations)
random.shuffle(randomized_combinations)
changeable1 = self.changeables[problem_child]
for solution in randomized_combinations:
if problem_child in solution[1]:
continue
combination = solution[0][0]
fixes[problem_child] = combination[problem_child]
for index2 in interact[changeable1.index]:
jchangeable = global_to_local_index.get(index2)
if jchangeable is None:
continue
neighbours[jchangeable] = combination[jchangeable]
break
fixes.update(neighbours)
if len(fixes) != 0:
comb = copy.deepcopy(local_combinations[0][0][0])
for (problem_child, istate) in fixes.items():
comb[problem_child] = istate
return comb
else:
report(LOG_EXTRA, ' No valid hybrid found.')
return None
[docs] def trim_redundant_combinations(self):
nonredund_combs = []
existing_scores = set()
for comb in self.combinations:
if comb[2] not in existing_scores:
nonredund_combs.append(comb)
existing_scores.add(comb[2])
self.combinations = nonredund_combs
return
[docs] def assign_combination(self,
ct,
icombination,
add_labels,
label_pkas,
verbose=False):
"""
Assign a given combination to this cluster
:param ct: The structure to operate on
:type ct: schrodinger.Structure
:param icombination: The index of the combination to assign or if
this number is larger then the stored combinations, just keep
the current state
:type icombinations: integer
:param add_labels: Whether to add labels to atoms to be seen in
maestro with the current protonation state
:type add_labels: bool
:param label_pka: Whether to add labels for the pKa of each residue
:type label_pka: bool
:param verbose: Whether to report additional information to the log
file about the combination chosen
:type verbose: bool
"""
atoms_to_delete = []
if icombination < len(self.combinations):
for ichangeable in range(len(self.changeables)):
state_gaps = self.determine_gap(icombination, ichangeable)
atoms = self.changeables[ichangeable].assign_state(
ct,
self.combinations[icombination][0][ichangeable],
add_labels,
label_pkas,
state_gaps=state_gaps,
verbose=verbose)
atoms_to_delete.extend(atoms)
else:
for ichangeable in range(len(self.changeables)):
changeable = self.changeables[ichangeable]
atoms = changeable.assign_state(ct,
changeable.current_state,
add_labels, label_pkas)
atoms_to_delete.extend(atoms)
return atoms_to_delete
[docs] def determine_gap(self, icombination, ichangeable):
"""
Create a dictionary with the energy gaps to each of the various states.
States that differ by only a hydrogen rotation are not considered
unique
:type icombination: integer
:param icombination: the combination to use as the zero
point. In most situations this will be the lowest energy
combination ( 0 when sorted)
:type ichangeable: integer
:param ichangeable: The residue number ( or position number)
within the cluster which will be analyzed
:rparam: dictionary where the key is the name of the state
or "Default" when the state is one of the staggers
:rtype: dictionary with a key of string and value of a float
"""
def dedup_state(state):
if (re.match(r"Stagger \d+ \d+/\d+", state) or
re.match("Stagger", state) or
re.match(r"Orientation \d+/\d+", state) or
re.match(r"Default \d+ \d+/\d+", state) or
re.match(r"Default \d+/\d+", state)):
return "Default"
return state
changeable = self.changeables[ichangeable]
state_gaps = {}
min_score = self.combinations[icombination][2]
for combination in self.combinations:
state_name = dedup_state(
changeable.state_names[combination[0][ichangeable]])
if state_name not in state_gaps:
state_gaps[state_name] = combination[2] - min_score
else:
state_gaps[state_name] = min(combination[2] - min_score,
state_gaps[state_name])
return state_gaps
# ProtAssign class start
[docs] def __init__(
self,
ct,
interactive=False,
do_flips=True,
asl='',
noprot_asl='',
atoms=[], # noqa: M511
use_xtal=False,
sample_waters=True,
sample_acids=True,
freeze_existing=False,
include_initial=False,
max_comb=10000,
num_sequential_cycles=30,
max_cluster_size=None, # The maximum number of residues
seed: Optional[int] = None,
# to include in each cluster or None
# if no such maximum should be used
logging_level=DEFAULT_LOG_LEVEL,
quiet_flag=False,
debug_flag=False,
add_labels=True,
label_pkas=False,
pH=pH_neutral,
use_propka=True,
propka_pH=7.0,
user_states=[], # noqa: M511
minimize=False,
ligand_sts=None,
include_epik_states=False,
):
"""
:param seed: Seed for random number generator
:param ligand_sts: Ligand states to consider during optimization.
:type ligand_sts: List[Structure]
"""
if seed is not None:
random.seed(seed)
report(LOG_DEBUG, f"Set random seed to {seed}")
global log_level
log_level = logging_level
self.include_epik_states = include_epik_states
# Alternate (backwards compatible) way of specifying output level.
if quiet_flag:
log_level = LOG_NONE
elif debug_flag:
log_level = LOG_DEBUG
self.species = [
self.amide_changeable, self.amine_changeable,
self.histidine_changeable, self.rotatable_changeable
]
self.interactive = interactive
self.do_flips = do_flips
self.noprot_asl = noprot_asl
self.use_xtal = use_xtal
self.freeze_existing = freeze_existing
self.include_initial = include_initial
self.max_comb = max_comb
self.num_sequential_cycles = num_sequential_cycles
self.max_cluster_size = max_cluster_size
self.add_labels = add_labels
self.label_pkas = label_pkas
self.changeables = []
self.clusters = []
self.interact = {}
self.donors = []
self.acceptors = []
self.clashers = []
self.xtal_ct = None
self.pH = pH
self.use_propka = use_propka
self.propka_pH = propka_pH
self.propka_failed = False
self.pka_predictors = {}
# use_propka can be changed to False if PROPKA fails, so always
# initialize the rule-based predictors.
if USE_PROTASSIGN_PKA_PREDICTOR:
# Holds the pH number for the internal pH predictor
# pH should be always defined, but otherwise resort to neutral pH
self.pH_predictor = PH_STRING_TO_NUMBER.get(self.pH, 7.0)
self.pka_predictors = {
'his': HistidinepKaPredictor(),
'asp': AsparticAcidpKaPredictor(),
'glu': GlutamicAcidpKaPredictor(),
}
self.unsatisfied_donors_by_cluster = defaultdict(list)
self.ligand_sts = ligand_sts or []
if self.include_epik_states:
self.ligand_sts += extract_epik_states(ct, self.include_initial)
self.user_states = user_states
self.minimize = minimize
if sample_acids:
self.species.append(self.carboxyl_changeable)
if sample_waters:
self.species.append(self.water_changeable)
# If using PropKa, don't want to use the coarse pH rules
if self.use_propka:
self.pH = None
# Record it though
self.pH_backup = pH
# Fix some element types, in case they're wrong.
self.fix_elements(ct)
# Set up the atoms to process.
asl_atoms = []
if asl == '' and len(atoms) == 0:
asl = 'all'
if asl:
asl_atoms = analyze.evaluate_asl(ct, asl)
self.target_atoms = set(atoms + asl_atoms)
H_atoms = analyze.evaluate_asl(ct, 'atom.ele H')
self.target_hydrogens = set(H_atoms).intersection(self.target_atoms)
if self.freeze_existing:
self.frozen_hydrogens = self.target_hydrogens.copy()
self.freeze_existing_hydrogens(ct)
else:
self.frozen_hydrogens = set()
self.setup(ct)
self.set_user_states(ct)
if not self.interactive:
self.optimize(ct)
self.cleanup(ct)
self.summarize_pkas()
[docs] def fix_elements(self, ct):
for atom in ct.atom:
if atom.pdbname in [' OD1', ' OE1', ' OD2', ' OE2']:
atom.atomic_number = 8
elif atom.pdbname in [' ND1', ' ND2', ' NE2']:
atom.atomic_number = 7
elif atom.pdbname in [' CG ', ' CD ', ' CD2', ' CE1']:
atom.atomic_number = 6
ct.retype()
return
[docs] def freeze_existing_hydrogens(self, ct):
atoms_to_remove = set()
labile_H_atoms = set(
analyze.evaluate_asl(
ct,
'atom.ptype \" HD1\" OR atom.ptype \" HE2\" OR atom.ptype \"1HD2\" OR atom.ptype \"2HD2\" OR atom.ptype \"1HE2\" OR atom.ptype \"2HE2\" OR atom.ptype \"HD21\" OR atom.ptype \"HD22\" OR atom.ptype \"HE21\" OR atom.ptype \"HE22\"'
))
for hydrogen in self.target_hydrogens.intersection(labile_H_atoms):
if hydrogen in atoms_to_remove:
continue
residue_atoms = ct.getResidueAtoms(hydrogen)
for residue_atom in residue_atoms:
atom = int(residue_atom)
atoms_to_remove.add(atom)
for hydrogen in self.target_hydrogens.difference(labile_H_atoms):
if hydrogen in atoms_to_remove:
continue
residue_atoms = ct.getResidueAtoms(hydrogen)
for residue_atom in residue_atoms:
atom = int(residue_atom)
if ct.areBound(hydrogen, atom):
atoms_to_remove.add(atom)
self.target_atoms.difference_update(atoms_to_remove |
self.frozen_hydrogens)
return
[docs] def identify_changeables(self, ct):
changeables = identify_ligand_changeables(ct, self.ligand_sts)
changeables += identify_species(ct, self.species, self.target_atoms)
changeables = filter_nonshared_rotatables(ct, changeables)
# Give an index to each changeable for bookkeeping
for n, changeable in enumerate(changeables):
changeable.index = n
return changeables
[docs] def setup(self, ct):
charge_arginine_sidechains(ct)
build.add_hydrogens(ct)
self.extend_targeted_to_hyds(ct)
self.changeables = self.identify_changeables(ct)
if self.use_propka:
try:
pkas = self.run_propka(self.changeables,
ct,
use_xtal=self.use_xtal)
except PropKaException:
report(
LOG_BASIC,
'WARNING: PropKa failed on this structure - disabling use of PropKa.'
)
self.use_propka = False
self.pH = self.pH_backup
self.propka_failed = True
ct.property['b_pa_PROPKA_failed'] = 1
self.remove_zero_order_bonds(ct)
for item in self.changeables:
item.pre_treat_1(ct)
build.add_hydrogens(ct)
for item in self.changeables:
item.pre_treat_2(ct)
annotate_atom_indices(ct)
self.enumerate_changeable_states(ct)
self.lock_protonation_states(ct)
self.cluster(ct)
if self.use_propka:
self.apply_pkas(self.changeables, pkas, self.propka_pH)
elif USE_PROTASSIGN_PKA_PREDICTOR:
# Use new internal predictor if PROPKA is not requested and we are
# using simple rules
self.empirical_pka_predictor(ct)
if self.interactive:
# Reset back to initial states.
atoms_to_delete = []
for changeable in self.changeables:
atoms = changeable.assign_state(ct,
changeable.initial_state,
add_labels=False,
label_pkas=self.label_pkas)
if atoms is not None:
atoms_to_delete.extend(atoms)
self.delete_atoms(ct, atoms_to_delete)
[docs] def empirical_pka_predictor(self, ct):
"""Predict pKa of histidine and Asp/Glu based on empirical rules"""
ct = ct.copy()
self.annotate_structure(ct)
if self.use_xtal:
try:
mates = analyze.generate_crystal_mates(ct, radius=4.0)
for mate in mates[1:]:
ct.extend(mate)
except Exception:
pass
for resname, predictor in self.pka_predictors.items():
report(LOG_BASIC, f"Calculating pKa for residue {resname.upper()}")
predictor.predict(ct)
for changeable in self.changeables:
residue = get_residue_from_changeable(ct, changeable)
if residue is None:
continue
for atom in residue.atom:
pka = atom.property.get('r_pka_prediction')
if pka is None:
continue
changeable.pka = pka
# For histidine we calculate two pKa for each of the flip states
if self.do_flips:
# Remove previous annotation for pKa prediction
for atom in ct.atom:
for key in atom.property:
if key.startswith(('b_pka_', 'r_pka_')):
del atom.property[key]
self.pka_predictors['his'].predict(ct, flip=True)
for changeable in self.changeables:
if changeable.type != 'HISTIDINE':
continue
residue = get_residue_from_changeable(ct, changeable)
if residue is None:
continue
for atom in residue.atom:
pka = atom.property.get('r_pka_prediction')
if pka is None:
continue
changeable.pka_flip = pka
# Adjust penalties
for changeable in self.changeables:
changeable.change_empirical_pka(self.pH_predictor)
[docs] def remove_zero_order_bonds(self, ct):
num_zobs = 0
for item in self.changeables:
atoms = item.get_adjustable_atoms()
for iatom in atoms:
if iatom is None:
continue
deleting = True
while deleting:
deleting = False
for bond in ct.atom[iatom].bond:
if bond.order == 0:
bond.atom1.deleteBond(bond.atom2)
bond.atom1.property['i_pa_zob%d' % num_zobs] = 1
bond.atom2.property['i_pa_zob%d' % num_zobs] = 1
num_zobs += 1
deleting = True
break
ct.property['i_pa_num_zobs'] = num_zobs
return
[docs] def extend_targeted_to_hyds(self, ct):
# If a heavy atom is targets, so are its attached hyds.
H_atoms = set(analyze.evaluate_asl(
ct,
'atom.ele H')).difference(self.frozen_hydrogens & self.target_atoms)
for H_atom in H_atoms:
for atom in ct.atom[H_atom].bonded_atoms:
if int(atom) in self.target_atoms:
self.target_atoms.add(H_atom)
return
[docs] def delete_atoms(self, ct, iatoms: List[int]):
"""Delete atoms and update stored atom indices"""
ct.deleteAtoms(iatoms)
new_indices = {
atom.property['i_pa_atomindex']: atom.index for atom in ct.atom
}
for changeable in self.changeables:
changeable.update_atom_indices(ct, new_indices)
# Update atom indices of interactors
self.acceptors = [
new_indices[iatom]
for iatom in self.acceptors
if iatom in new_indices
]
self.clashers = [
new_indices[iatom]
for iatom in self.clashers
if iatom in new_indices
]
self.donors = [(new_indices[iatom1], new_indices[iatom2])
for iatom1, iatom2 in self.donors
if iatom1 in new_indices]
annotate_atom_indices(ct)
[docs] def run_propka(self, changeables, ct, use_xtal=False):
# See if we even need to bother.
# If there's just a single carboxylate with an
# already assigned pKa, it can't have changed.
if ((len(changeables) == 1) and (changeables[0].type == 'CARBOXYL') and
(changeables[0].pka is not None)):
return {}
# If there's nothing that depends on pH.
proceed = False
for changeable in changeables:
if (changeable.type in ['HISTIDINE', 'CARBOXYL', 'AMINE'] or
(changeable.type == 'ROTATABLE' and changeable.ionizable)):
proceed = True
break
if not proceed:
return {}
report(LOG_BASIC, ' Running PROPKA to get pKas')
if use_xtal:
propka_ct = self.generate_mates(ct)
else:
propka_ct = ct
pkas = {}
try:
calculations = ['pka']
calculator = protein.PropertyCalculator(propka_ct,
'hbondopt_propkajob')
results = {}
for res, data in calculator.calculateOverResidues(*calculations):
if data['pka'] is not None:
results[str(res)] = float(data['pka'])
except:
raise PropKaException('PROPKA failed')
for changeable in changeables:
# Need to convert to same format as PropKa uses.
name = changeable.name.split(':')[0] + ':' + changeable.name.split(
':')[1][4:]
name = name.rstrip()
if name in results:
pkas[name] = (changeable.pka, results[name])
return pkas
[docs] def generate_mates(self, ct):
chains = [
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'1', '2', '3', '4', '5', '6', '7', '8', '9', '0'
]
for chain in ct.chain:
if chain.name in chains:
chains.remove(chain.name)
try:
mates = analyze.generate_crystal_mates(ct)
except:
report(
LOG_NONE,
'Failure in generating crystal mates. Please check crystal properties.'
)
sys.exit()
new_ct = copy.copy(ct)
for atom in new_ct.atom:
atom.property['i_pa_propka_maincell'] = 1
# Need to renumber, rechain
resnum = 1
chain = chains.pop()
for i, mate in enumerate(mates):
if i == 0:
continue
for res in mate.residue:
res.chain = chain
res.resnum = resnum
resnum += 1
if resnum > 9999:
chain = chains.pop()
resnum = 1
new_ct.extend(mate)
important_atoms = analyze.evaluate_asl(
new_ct, 'fillres within 20 (atom.i_pa_propka_maincell 1)')
truncated_new_ct = new_ct.extract(important_atoms)
build.add_hydrogens(truncated_new_ct)
return truncated_new_ct
[docs] def apply_pkas(self, changeables, changes, propka_pH):
"""Update pKa's coming from PROPKA"""
something_changed = False
for changeable in changeables:
# Need to convert to same format as PropKa uses.
name = changeable.name.split(':')[0] + ':' + changeable.name.split(
':')[1][4:]
name = name.rstrip()
if name in changes:
old_pka = changeable.pka
changed = changeable.change_pka(changes[name][1], propka_pH)
if changed:
something_changed = True
if old_pka is not None:
report(
LOG_BASIC,
' Updating protonation penalties for %s: pKa %.2f'
% (changeable.name, changeable.pka))
else:
changed = changeable.change_pka(None, propka_pH)
return something_changed
[docs] def find_protonation_state_changes(self, ct, clusters='all'):
clusters_to_redo = []
pkas = self.run_propka(self.changeables, ct, use_xtal=self.use_xtal)
for i, cluster in enumerate(self.clusters):
if clusters != 'all' and i not in clusters:
continue
something_changed = self.apply_pkas(cluster.changeables, pkas,
self.propka_pH)
if something_changed:
clusters_to_redo.append(i)
return clusters_to_redo
[docs] def annotate_structure(self, ct):
"""
Annotate atoms in structure by their interaction class and whether or
not they are static
Internally updates the acceptors, donors and clashers attributes
"""
interactors = identify_all_hbonders(ct)
self.acceptors = interactors.acceptors
self.clashers = interactors.clashers
self.donors = interactors.donors
annotate_structure_interactors(ct, self.acceptors, self.donors,
self.clashers)
# Indicate if interactor is static. This is important to know during
# generation of states.
self.remove_changeables_from_hbonders()
atom_iterator = itertools.chain(self.acceptors, self.clashers,
*self.donors)
for iatom in atom_iterator:
ct.atom[iatom].property[STATIC_PROPERTY] = True
return ct
[docs] def enumerate_changeable_states(self, ct):
"""
Enumerate all states for each changeable. Crystal symmetry mates are
taken into account if requested.
"""
working_ct = ct.copy()
self.annotate_structure(working_ct)
if self.use_xtal:
try:
mates = analyze.generate_crystal_mates(working_ct, radius=4.0)
except:
report(
LOG_NONE,
'Failure in generating crystal mates. Please check crystal properties.'
)
sys.exit()
for i, mate in enumerate(mates):
for atom in mate.atom:
atom.property['i_pa_xtalatom'] = i
if i > 0:
mates[0].extend(mate)
self.xtal_ct = mates[0]
# Atoms in primary image which are close to a crystal image (assume atom indices in ct and xtal_ct are the same).
surface_atoms = set(
analyze.evaluate_asl(
self.xtal_ct,
"(within 5.0 (atom.i_pa_xtalatom > 0)) and not (atom.i_pa_xtal_atom > 0)"
))
for atom in ct.atom:
if int(atom) in surface_atoms:
atom.property['i_pa_surfaceatom'] = 1
else:
atom.property['i_pa_surfaceatom'] = 0
# Get rid of extra atoms.
atoms_to_delete = analyze.evaluate_asl(
self.xtal_ct, "not (fillres within 4 (atom.i_pa_xtalatom 0 ) )")
self.xtal_ct.deleteAtoms(atoms_to_delete)
working_ct = self.xtal_ct
dcell = create_distance_cell(working_ct,
MAX_ORIENT_HYDROGEN_DISTANCE,
honor_pbc=False)
for changeable in self.changeables:
acceptors, donors, _ = changeable.get_close_interactors(
working_ct, dcell)
changeable.enumerate_states(working_ct,
acceptors,
donors,
self.pH,
do_flips=self.do_flips,
include_initial=self.include_initial)
[docs] def lock_protonation_states(self, ct):
if self.noprot_asl == '':
return
locked_atoms = analyze.evaluate_asl(ct, self.noprot_asl)
for changeable in self.changeables:
atoms = changeable.get_heavies()
for atom in atoms:
if atom in locked_atoms:
changeable.lock_protonation()
return
[docs] def remove_changeables_from_hbonders(self):
heavy_atom_set = set()
for changeable in self.changeables:
heavy_atom_set |= set(changeable.get_heavies())
self.acceptors = [a for a in self.acceptors if a not in heavy_atom_set]
self.donors = [d for d in self.donors if d[0] not in heavy_atom_set]
adjustable_atom_set = set()
for changeable in self.changeables:
adjustable_atom_set |= set(changeable.get_adjustable_atoms())
self.clashers = [
c for c in self.clashers if c not in adjustable_atom_set
]
@staticmethod
def _maybe_break_clusters(labels: np.ndarray, nclusters: int,
max_size: int) -> int:
""" Breaks clusters larger than max_size & returns new nclusters
This method will break clusters in place to with the limiting
their max size with max_size. It will return the new
number of clusters.
"""
for id in range(nclusters):
indices_with_id = np.flatnonzero(labels == id)
size = indices_with_id.shape[0]
# Skip the first max_size atoms, and start updating from there on.
start = max_size
while start < size:
end = start + max_size
# Existing cluster ids will be [0, ..., nclusters - 1]. So we
# are using nclusters as the next available cluster id and
# incrementing it.
labels[indices_with_id[start:end]] = nclusters
nclusters += 1
start = end
return nclusters
@staticmethod
def _get_index_clusters(
N: int, interaction_matrix: Dict[int, Set[int]],
max_cluster_size: Optional[int]) -> Iterator[np.ndarray]:
"""
Cluster interactors based on an interaction matrix
:param N: Number of changeables
:param interaction_matrix: Interaction lookup table
:param max_cluster_size: Maximum cluster size. None is no maximum.
:return: Clustered interactors
"""
interaction_matrix_sparse = dok_matrix((N, N), dtype=bool)
for i, js in interaction_matrix.items():
interaction_matrix_sparse[i, list(js)] = True
nclusters, labels = connected_components(interaction_matrix_sparse,
directed=False)
if max_cluster_size is not None:
nclusters = ProtAssign._maybe_break_clusters(
labels, nclusters, max_cluster_size)
return (
np.flatnonzero(labels == icluster) for icluster in range(nclusters))
[docs] def cluster(self, ct):
"""Cluster changeables based on their heavies."""
# Extract all heavies from the changeables as these determine whether
# they interact with other changeables. Annotate to which changeable a
# heavy belongs.
heavies = []
for changeable in self.changeables:
for heavy in changeable.get_heavies():
ct.atom[heavy].property[
'i_pa_changeable_index'] = changeable.index
heavies.append(heavy)
self.interact = calculate_interaction_matrix(ct, heavies,
CLUSTERING_DISTANCE,
self.use_xtal)
# Clean up ct
for heavy in set(heavies):
del ct.atom[heavy].property['i_pa_changeable_index']
# Create clusters based on interaction
N = len(self.changeables)
index_clusters = self._get_index_clusters(N, self.interact,
self.max_cluster_size)
self.clusters = []
# Create the hbond_cluster instances from the index clusters
for icluster in index_clusters:
new_cluster = self.hbond_cluster()
new_cluster.use_xtal = self.use_xtal
new_cluster.changeables.extend(
self.changeables[index] for index in icluster)
# Setup record of which species need xtal symmetry to interact.
new_cluster.setup_xtal(ct, self.interact, CLUSTERING_DISTANCE)
self.clusters.append(new_cluster)
# Report.
report(LOG_BASIC)
report(LOG_BASIC, 'Clusters to optimize:')
for i, cluster in enumerate(self.clusters):
report(LOG_BASIC, (" Cluster %d:" % (i + 1)))
for item in cluster.changeables:
report(LOG_BASIC, (" " + item.name))
report(LOG_BASIC)
[docs] def set_user_states(self, ct):
for choice in self.user_states:
(user_name, user_state) = choice
found_name = False
for ichangeable, changeable in enumerate(self.changeables):
# Remove the pdbres
name = changeable.name[0] + ':' + changeable.name[6:].rstrip()
if name == user_name:
found_name = True
found_state = False
for istate, state in enumerate(changeable.state_names):
if state == user_state:
self.assign_state_of_changeable(
ct, ichangeable, istate)
self.changeables[ichangeable].locked = True
found_state = True
if not found_name:
print('ERROR: did not find species %s' % user_name)
sys.exit()
elif not found_state:
print('ERROR: did not find state %s for species %s' %
(user_state, user_name))
sys.exit()
return
[docs] def assign_state_of_changeable(self, ct, ichangeable, istate):
atoms_to_delete = self.changeables[ichangeable].assign_state(
ct, istate, self.add_labels, self.label_pkas)
if atoms_to_delete is not None:
self.delete_atoms(ct, atoms_to_delete)
return
[docs] def increment_state_of_changeable(self, ct, ichangeable):
if (self.changeables[ichangeable].current_state +
1) == self.changeables[ichangeable].nstates:
self.assign_state_of_changeable(ct, ichangeable, 0)
else:
self.assign_state_of_changeable(
ct, ichangeable,
self.changeables[ichangeable].current_state + 1)
return
[docs] def decrement_state_of_changeable(self, ct, ichangeable):
if self.changeables[ichangeable].current_state == 0:
self.assign_state_of_changeable(
ct, ichangeable, self.changeables[ichangeable].nstates - 1)
else:
self.assign_state_of_changeable(
ct, ichangeable,
self.changeables[ichangeable].current_state - 1)
return
[docs] def assign_best_combinations(self, ct, last_time=False):
"""
Assign the best combinations to the ct and report output
:param ct: The structure to operate on
:type ct: schrodinger.Structure
:param last_time: Whether or not this is the last time through when we
should be extra verbose
:type last_time: bool
"""
atoms_to_delete = []
for cluster in self.clusters:
atoms = cluster.assign_combination(ct,
0,
self.add_labels,
self.label_pkas,
verbose=last_time)
atoms_to_delete.extend(atoms)
self.delete_atoms(ct, atoms_to_delete)
[docs] def assign_cluster_combination(self, ct, icluster, icombination):
atoms = self.clusters[icluster].assign_combination(
ct, icombination, self.add_labels, self.label_pkas)
self.delete_atoms(ct, atoms)
return
[docs] def single_point_cluster(self, ct, icluster):
atoms = self.clusters[icluster].single_point(ct, self.interact,
self.donors,
self.acceptors,
self.clashers)
self.delete_atoms(ct, atoms)
atoms = self.clusters[icluster].assign_combination(
ct, 1000000000, self.add_labels, self.label_pkas)
self.delete_atoms(ct, atoms)
return self.clusters[icluster].single_point_score
[docs] def optimize_cluster(self, ct, icluster, assign=True):
self.clusters[icluster].optimize(ct,
self.interact,
self.donors,
self.acceptors,
self.clashers,
self.max_comb,
self.num_sequential_cycles,
self.use_propka,
propka_pH=self.propka_pH)
if assign:
self.assign_cluster_combination(ct, icluster, 0)
return
[docs] def optimize(self, ct):
clusters_to_do = [i for i in range(len(self.clusters))]
niter = 0
niter_max = 2
if USE_PROTASSIGN_PKA_PREDICTOR and not self.use_propka:
niter_max = 20
total_score = 0.0
while (len(clusters_to_do) > 0) and (niter < niter_max):
niter += 1
total_score = 0.0
# Generate annotated structure for faster optimization, especially
# for Xtal structures.
for icluster, cluster in enumerate(self.clusters):
if icluster in clusters_to_do:
for changeable in cluster.changeables:
if not changeable.treated:
changeable.pre_treat(ct)
annotated_ct = generate_annotated_ct(ct,
self.donors,
self.acceptors,
self.clashers,
use_xtal=self.use_xtal)
for i, cluster in enumerate(self.clusters):
if i not in clusters_to_do:
total_score += cluster.combinations[0][-1]
continue
report(LOG_BASIC, 'Working on cluster #%d...' % (i + 1))
for changeable in cluster.changeables:
report(
LOG_BASIC, ' ' + changeable.name + ' : ' +
str(changeable.nstates) + ' states')
cluster.optimize(ct,
self.interact,
self.donors,
self.acceptors,
self.clashers,
self.max_comb,
self.num_sequential_cycles,
self.use_propka,
propka_pH=self.propka_pH,
annotated_ct=annotated_ct)
# Just in case something went wrong.
try:
total_score += cluster.combinations[0][-1]
except:
pass
self.assign_best_combinations(ct)
# Now see if any protonation states need to be redone.
if self.use_propka and niter < niter_max:
clusters_to_do = self.recalculate_propka_pkas(ct)
elif USE_PROTASSIGN_PKA_PREDICTOR and niter < niter_max:
clusters_to_do = self.recalculate_empirical_pkas(ct, niter)
else:
clusters_to_do = []
self.assign_best_combinations(ct, last_time=True)
if self.minimize:
report(LOG_BASIC, 'Minimizing hydrogens...\n')
self.minimize_hydrogens(ct)
report(LOG_BASIC, 'Total score for structure: ' + str(total_score))
[docs] def recalculate_empirical_pkas(self, ct, iteration):
"""Only recalculate pKa's of histidines for forced acceptor interactions"""
assert iteration > 0
report(LOG_BASIC,
"Checking for unsatisfied donors in hydrogen bond networks")
working_ct = ct.copy()
self.annotate_structure(working_ct)
# Check unsatisfied donors before creating crystal mates, as
# determining bulk solvent is not yet working with crystal mates
# present.
unsatisfied_donors = get_unsatisfied_donors(working_ct,
include_bsa=False)
unsatisfied_donors_indices = {a.index for a in unsatisfied_donors}
if self.use_xtal:
try:
mates = analyze.generate_crystal_mates(working_ct, radius=4.0)
for mate in mates[1:]:
working_ct.extend(mate)
except Exception:
pass
clusters_to_do = []
for icluster, cluster in enumerate(self.clusters):
prev_unsatisfied_donors = None
if iteration > 1:
prev_unsatisfied_donors = self.unsatisfied_donors_by_cluster[
icluster]
# Extract unsatisfied donors that are not bulk solvent accessible
# and histidines
unsatisfied_donors_in_cluster = []
hips = []
for changeable in cluster.changeables:
for iheavy in changeable.get_heavies():
if iheavy in unsatisfied_donors_indices:
unsatisfied_donors_in_cluster.append(iheavy)
if changeable.type == 'HISTIDINE':
hips.append(changeable)
self.unsatisfied_donors_by_cluster[
icluster] = unsatisfied_donors_in_cluster
if unsatisfied_donors_in_cluster:
report(LOG_BASIC,
f"Unsatisfied donors found in cluster {icluster}:")
for idonor in unsatisfied_donors_in_cluster:
atom = working_ct.atom[idonor]
report(
LOG_BASIC,
f" {atom.pdbres} {atom.chain}{atom.resnum}{atom.inscode} {atom.pdbname}"
)
# There is nothing to change, since there are no HIP residues or
# forced neutral states.
if not hips:
continue
# Penalize the charged states for a histidine. check the current
# penalties to determine which histidines have already been
# penalized.
current_unsatisfied_donors = self.unsatisfied_donors_by_cluster[
icluster]
if iteration == 1:
hip_to_penalize_index = 0
else:
hip_to_penalize_index = None
redo_cluster = False
for n, hip in enumerate(hips):
if hip.force_neutral:
redo_cluster = True
hip.force_neutral = False
hip_to_penalize_index = n + 1
if len(current_unsatisfied_donors) < len(
prev_unsatisfied_donors):
# Forced accepter interaction found for histidine. pKa
# is acutally lowered cause the shift is a negative number.
if hip.current_state in (0, 1):
hip.pka += HistidinepKaPredictor.FORCED_ACCEPTOR_PKA_SHIFT
elif hip.current_state in (3, 4):
hip.pka_flip += HistidinepKaPredictor.FORCED_ACCEPTOR_PKA_SHIFT
report(
LOG_BASIC,
f"Found forced acceptor state for {hip.name}. Lowering pKa to {hip.pka:.2f}"
)
# This will reset the pH penalties etc.
hip.change_empirical_pka(self.pH_predictor)
break
if hip.force_charged:
redo_cluster = True
hip.force_charged = False
hip_to_penalize_index = n + 1
pkas = [
pka for pka in (hip.pka, hip.pka_flip)
if pka is not None
]
# Don't apply forced acceptor rule if pKa is already very
# low or if we still get the neutral form.
if hip.current_state not in [2, 5] or min(pkas) < 4.0:
hip.change_empirical_pka(self.pH_predictor)
break
if len(current_unsatisfied_donors) > len(
prev_unsatisfied_donors):
if hip.current_state == 2:
hip.pka += HistidinepKaPredictor.FORCED_ACCEPTOR_PKA_SHIFT
elif hip.current_state == 5:
hip.pka_flip += HistidinepKaPredictor.FORCED_ACCEPTOR_PKA_SHIFT
report(
LOG_BASIC,
f"Found forced acceptor state for {hip.name}. Lowering pKa to {hip.pka:.2f}"
)
# Reset the number of unsatisfied donors to the lower
# number as we had to force this histidine to be
# charged and it would be neutral in the non-forced
# state.
self.unsatisfied_donors_by_cluster[
icluster] = prev_unsatisfied_donors
# This will reset the pH penalties etc.
hip.change_empirical_pka(self.pH_predictor)
break
if redo_cluster:
clusters_to_do.append(icluster)
if hip_to_penalize_index is None or hip_to_penalize_index >= len(
hips):
continue
# Don't force neutral or charged state of histidine if the pKa is
# already very low.
for i in range(hip_to_penalize_index, len(hips)):
hip = hips[i]
pkas = [
pka for pka in (hip.pka, hip.pka_flip) if pka is not None
]
if min(pkas) > 4.0:
break
report(
LOG_BASIC,
f"Skipping forced acceptor state search for {hip.name} due to already low pKa."
)
else:
# If we reach the end we are done, and can check the next cluster.
continue
if hip.current_state in [2, 5]:
hip.force_neutral = True
report(
LOG_BASIC,
f"Recalculating hydrogen bond network using forced neutral state for {hip.name}"
)
elif hip.pka:
hip.force_charged = True
report(
LOG_BASIC,
f"Recalculating hydrogen bond network using forced charged state for {hip.name}"
)
# This will update the penalties
hip.change_empirical_pka(self.pH_predictor)
clusters_to_do.append(icluster)
return sorted(set(clusters_to_do))
[docs] def recalculate_propka_pkas(self, ct):
report(LOG_BASIC, '')
report(
LOG_BASIC,
'Recalculating pKas to identify changes in protonation penalties...'
)
self.assign_best_combinations(ct)
try:
clusters_to_do = self.find_protonation_state_changes(ct)
except PropKaException:
report(
LOG_NONE,
'WARNING: PropKa failed when looking for changes in protonation state penalties.'
)
clusters_to_do = []
if len(clusters_to_do) > 0:
report(
LOG_BASIC,
' The following clusters have new protonation state penalties, and will be re-run:'
)
report(LOG_BASIC,
' ' + ','.join([str(i + 1) for i in clusters_to_do]))
else:
report(LOG_BASIC, ' No changes in protonation penalties found.')
report(LOG_BASIC, '')
return clusters_to_do
[docs] def minimize_hydrogens(self, ct):
hydrogens = []
for cluster in self.clusters:
for changeable in cluster.changeables:
if changeable.type in ['ROTATABLE', 'WATER']:
changeable_sites = changeable.get_state_sites(
ct, changeable.current_state)
hydrogens += [i[1] for i in changeable_sites[1]]
minimizer = minimize.Minimizer(min_method=minimize.MinCG)
minimizer.setStructure(ct)
for atom in ct.atom:
if int(atom) not in hydrogens:
minimizer.addPosFrozen(int(atom))
minimizer.minimize()
[docs] def restore_zobs(self, ct):
num_zobs = ct.property.get('i_pa_num_zobs')
if num_zobs is None:
return
for i in range(num_zobs):
atoms = analyze.evaluate_asl(ct, '(atom.i_pa_zob%d = 1)' % i)
if len(atoms) == 2:
ct.addBond(atoms[0], atoms[1], 0)
for iatom in atoms:
del ct.atom[iatom].property['i_pa_zob%d' % i]
del ct.property['i_pa_num_zobs']
return
[docs] def cleanup(self, ct):
self.restore_zobs(ct)
atoms_to_delete = analyze.evaluate_asl(
ct, "withinbonds 1 (atom.i_pa_xtalatom 1)")
for atom in ct.atom:
for name in [
's_pa_ligand_id', 'i_pa_ligand_het', 'i_pa_ligand_rotatable'
]:
atom.property.pop(name, None)
if len(atoms_to_delete) == 0:
ct.retype()
return
ct.deleteAtoms(atoms_to_delete)
for atom in ct.atom:
if 'i_pa_atomindex' in atom.property:
del atom.property['i_pa_atomindex']
if 'i_pa_xtalatom' in atom.property:
del atom.property['i_pa_xtalatom']
if 'i_pa_surfaceatom' in atom.property:
del atom.property['i_pa_surfaceatom']
ct.retype()
return
[docs] def summarize_pkas(self):
first = True
for changeable in self.changeables:
if changeable.pka is None:
continue
if first:
report(LOG_BASIC, '\nFinal pKas:')
first = False
pka = changeable.pka
# Histidine has two pKa's, one for each flip state
if changeable.type == 'HISTIDINE' and changeable.current_state > 2:
if changeable.pka_flip is not None:
pka = changeable.pka_flip
report(LOG_BASIC, f' {changeable.name} : {pka:.2f}')
[docs]def annotate_atom_indices(ct):
"""Add atom index as an atom property to each atom"""
for atom in ct.atom:
atom.property['i_pa_atomindex'] = atom.index
[docs]def identify_species(ct, species: List[Type],
target_atoms) -> List[ProtAssign.changeable]:
"""
Identify all changeables, i.e. elements in the network that can be changed
apart from ligand changeables.
"""
# Now identify the other changeable types
changeables = []
for species in species:
iatoms = analyze.evaluate_asl(ct, species.asl)
for iatom in iatoms:
if iatom not in target_atoms:
continue
new_changeable = species(ct, iatom)
if not new_changeable.valid:
continue
changeables.append(new_changeable)
return changeables
[docs]def identify_ligand_changeables(
ct, ligand_sts: List[Structure]) -> List[ProtAssign.ligand_changeable]:
"""
Identify ligand changeables given a list of structures.
Ligand changeables are identified by comparing the residue ID of given
ligand structures with residues that are present in the Structure.
"""
changeables = []
if not ligand_sts:
return changeables
# Create a lookup table from ligand id to the ligand structure.
ligand_id_to_sts = defaultdict(list)
for ligand_st in ligand_sts:
residues = list(ligand_st.residue)
if len(residues) != 1:
name = ' - '.join([residue.pdbres.strip() for residue in residues])
report(
LOG_BASIC,
f"Only ligand consisting of a single residue are currently supported. Skipping ligand ({name})"
)
continue
residue = list(ligand_st.residue)[0]
ligand_id = (residue.chain, residue.resnum, residue.inscode,
residue.pdbres)
ligand_id_to_sts[ligand_id].append(ligand_st)
# Check for each its residue if it's a ligand by generating a ligand-id
# and if so, generate a ligand_changeable
for residue in ct.residue:
residue_id = (residue.chain, residue.resnum, residue.inscode,
residue.pdbres)
ligand_sts = ligand_id_to_sts[residue_id]
if not ligand_sts:
continue
changeable = ProtAssign.ligand_changeable(ct, residue.atom[1])
for ligand_st in ligand_sts:
changeable.add_protonation_state(ligand_st)
changeables.append(changeable)
return changeables
[docs]def identify_nonshared_rotatable(ct, ligand_changeables: List[
ProtAssign.ligand_changeable], changeable: ProtAssign.changeable) -> bool:
"""
Check if changeable is part of ligand and whether it can be its own
changeable or needs to be sampled within the ligand states.
:return: True if changeable is a non-shared rotatable across
ligand states, False otherwise
"""
if not ligand_changeables or changeable.type != 'ROTATABLE':
return False
atom1 = ct.atom[changeable.parent]
ligand_id = atom1.property.get('s_pa_ligand_id')
if ligand_id is None:
return False
# Get the specific ligand changeable this atom is part of.
ligand_changeable = None
for tmp in ligand_changeables:
if tmp.name == ligand_id:
ligand_changeable = tmp
break
if ligand_changeable is None:
raise RuntimeError(
"Could not identify ligand_changeable corresponding to given changeable."
)
# Check if the atom is available in all protonation states.
# If so it will be its own changeable, a
# rotatable_changeable. If not, the hydrogen position will
# be sampled within the ligand changeable.
identical_atoms = []
for ligand_st in ligand_changeable.protonation_states:
for atom2 in ligand_st.atom:
if atom2.atom_type != atom1.atom_type:
continue
d = max(abs(x1 - x2) for x1, x2 in zip(atom1.xyz, atom2.xyz))
if d > 0.01:
continue
identical_atoms.append(atom2)
break
if len(identical_atoms) == len(ligand_changeable.protonation_states):
# Register the shared changeable to the ligand_changeable
# for bookkeeping.
ligand_changeable.shared_changeables.append(changeable)
for atom in identical_atoms:
atom.property['i_pa_ligand_shared_changeable_index'] = 1
return False
return True
[docs]def update_rotatable_changeable_indices(
ct, iatoms: List[int],
changeables: List[ProtAssign.rotatable_changeable]):
"""
Update the atom indices of the shared changeables, in this case
specifically for rotatables.
:param ct: Structure
:current_state_indices: Ligand atom indices of current state
"""
parent = 'parent'
attr_names = (parent * 3, parent * 2, parent)
for changeable in changeables:
for attr in attr_names:
iatom1 = getattr(changeable, attr)
xyz = ct.atom[iatom1]
for iatom2 in iatoms:
d = ct.measure(iatom1, iatom2)
if d < 0.02:
setattr(changeable, attr, iatom2)
# Update hydrogen which can have moved
parent = changeable.parent
for ba in ct.atom[parent].bonded_atoms:
if ba.atomic_number == 1:
changeable.hydrogen = ba.index
[docs]def filter_nonshared_rotatables(
ct, changeables: List[ProtAssign.changeable]
) -> List[ProtAssign.changeable]:
"""
Given a list of changeables, filter rotatable_changeable instances if they
are not shared across all ligand states, as these nonshared rotatables will
be dealed with by the ligand_changeable itself.
"""
ligand_changeables = [
changeable for changeable in changeables if changeable.type == 'LIGAND'
]
return [
changeable for changeable in changeables
if not identify_nonshared_rotatable(ct, ligand_changeables, changeable)
]
[docs]def annotate_ligand_atoms(ligand_sts: List[Structure], ligand_id: str):
"""Annotate atoms of ligand structures with an ID and state ID"""
for n, ligand_st in enumerate(ligand_sts):
for atom in ligand_st.atom:
atom.property['s_pa_ligand_id'] = ligand_id
atom.property['i_pa_ligand_het'] = n
[docs]def update_ligand_rotatables_annotation(ct, iatoms: List[int]):
"""
Update atom properties of rotatable hydrogens in the ligand reindexing them
"""
n = 0
for iatom in iatoms:
atom = ct.atom[iatom]
if atom.property.get('i_pa_ligand_rotatable') is not None:
atom.property['i_pa_ligand_rotatable'] = n
n += 1
[docs]def add_ligand_sts(ct, ligand_sts: List[Structure]):
"""
Extend structure with ligand states
Checks if a ligand state is already part of the structure to determine
whether to add based on specific atom properties.
"""
# Check if certain ligand protonation states are already part of
# the structure
ligand_id = ligand_sts[0].atom[1].property['s_pa_ligand_id']
used_ligand_hets = set()
for atom in ct.atom:
if atom.property.get('s_pa_ligand_id') != ligand_id:
continue
ligand_het = atom.property.get('i_pa_ligand_het')
if ligand_het is not None:
used_ligand_hets.add(ligand_het)
for n, ligand_st in enumerate(ligand_sts):
if n not in used_ligand_hets:
ct.extend(ligand_st)
[docs]def get_ligand_atom_indices(ct, ligand_id: str) -> List[int]:
"""Return atom indices annotated by ligand_id"""
return analyze.evaluate_asl(ct, f"atom.s_pa_ligand_id '{ligand_id}'")
[docs]def further_annotate_ligand_atoms(ct, iatoms: List[int]):
"""
Further annotate ligand atoms notably hydrogen
Assumes ligand atoms are annotated already with s_pa_ligand_id and
i_pa_ligand_het
"""
for iatom in iatoms:
atom = ct.atom[iatom]
if atom.atomic_number == 1:
continue
atom.property['i_pa_atomindex'] = iatom
# Annotate the hydrogens too, as they might have been added
# since last annotating the ligand.
for neighbor in atom.bonded_atoms:
if neighbor.atomic_number != 1:
continue
neighbor.property.update({
's_pa_ligand_id': atom.property['s_pa_ligand_id'],
'i_pa_ligand_het': atom.property['i_pa_ligand_het'],
'i_pa_atomindex': neighbor.index,
})
[docs]def group_ligand_het_states(ct, iatoms: List[int]) -> Dict[int, List[int]]:
"""Return ligand atom indices grouped by het"""
# Group atoms by their het-state
iatoms_by_het = defaultdict(list)
for iatom in iatoms:
het = ct.atom[iatom].property['i_pa_ligand_het']
iatoms_by_het[het].append(iatom)
return iatoms_by_het
[docs]def get_ligand_interactors(ct, iatoms: List[int]):
"""Get ligand interactors while excluding shared acceptors/donors"""
sub_st = ct.extract(iatoms, copy_props=True)
mapper = {
atom.index: atom.property.get('i_pa_atomindex') for atom in sub_st.atom
}
interactors = identify_all_hbonders(sub_st)
interactors.remap(mapper)
# Filter out shared changeable donors as these interactions are accounted
# for by that specific shared changeable.
interactors.donors = [
(a1, a2)
for a1, a2 in interactors.donors
if 'i_pa_ligand_shared_changeable_index' not in ct.atom[a1].property
]
# Alcohols can also accept donors, so add oxygen to acceptor list.
for iheavy, ihydrogen in interactors.donors:
if ct.atom[iheavy].atomic_number == 8:
interactors.acceptors.append(iheavy)
return interactors
[docs]def annotate_ligand_rotatable_hydrogens(ct, ligand_iatoms: List[int]):
"""
Annotate rotatable hydrogens of the ligand.
These are either hydrogens that are shared with a rotatable_changeable or
rotatable in a subset of ligand states
"""
for iatom in ligand_iatoms:
atom = ct.atom[iatom]
# Check if we are dealing with an alcohol/hydroxyl group. If so obtain
# the hydrogen.
if atom.atomic_number != 8:
continue
hydrogens = [ba for ba in atom.bonded_atoms if ba.atomic_number == 1]
if len(hydrogens) != 1:
continue
# Now check whether the hydrogen is part of an independent
# rotatable_changeable.
hydrogen = hydrogens[0]
changeable_index = atom.property.get(
'i_pa_ligand_shared_changeable_index')
if changeable_index is None:
try:
dihedral_atoms = get_dihedral_atoms(ct, hydrogen)
if len(dihedral_atoms) == 4:
hydrogen.property['i_pa_ligand_rotatable'] = 1
except Exception:
pass
else:
hydrogen.property[
'i_pa_ligand_shared_changeable_index'] = changeable_index
[docs]def identify_all_hbonders(ct):
"""Identify all acceptor, donors and clashers in a structure"""
donors = []
donor_Hs = analyze.evaluate_asl(ct, "(atom.ele H AND not /C0-H0/)")
donor_Hs += analyze.evaluate_asl(
ct,
"(res.ptype \"HIS \",\"HID \",\"HIE \",\"HIP \") AND (atom.ptype \" HD2\" or atom.ptype \" HE1\")"
)
for H in donor_Hs:
H_atom = ct.atom[H]
if H_atom.bond_total == 1:
for atom in H_atom.bonded_atoms:
donors.append((int(atom), H))
acceptors = analyze.evaluate_asl(ct, "not atom.ele H AND not atom.ele C")
# Move metal ions from acceptor list to donor list (ion is both heavy atom and "hydrogen").
atoms_to_remove = []
for acceptor in acceptors:
atom = ct.atom[acceptor]
if atom.formal_charge > 0 and atom.atomic_number not in [7, 8]:
donors.append((acceptor, acceptor))
atoms_to_remove.append(acceptor)
for atom in atoms_to_remove:
acceptors.remove(atom)
# Remove any atoms accounted for in donors from list of acceptors.
for donor in donors:
if donor[0] in acceptors:
acceptors.remove(donor[0])
# All of the non-polar hydrogens
clashers = analyze.evaluate_asl(ct, "atom.ele H AND /C0-H0/")
return _Interactors(acceptors=acceptors, donors=donors, clashers=clashers)
[docs]def get_dihedral_atoms(ct: Structure, h: int) -> List[int]:
"""
Get atoms to define a dihedral angle given a hydrogen atom index
Return list is smaller than 4 elements if dihedral cannot be determined.
"""
ret_list = []
# The first atom will be H itself:
ret_list.append(h)
# Now find the O or S attached to the H. Note that bonds (like everything
# associated with structures) are indexed from "1" so we are actually
# looking for the first bond:
Xatom = ct.atom[h].bond[1].atom2
ret_list.append(int(Xatom))
# Now find a suitable non-H atom bonded to the X:
C1atom = -1
for b in Xatom.bond:
conn_atom = b.atom2
# This is probably the easiest way to find a non-H atom:
if not conn_atom.atomic_number == 1:
C1atom = int(conn_atom)
ret_list.append(C1atom)
# Leave the loop:
break
# Check to see that we did find a C1 atom:
if C1atom < 0:
return ret_list
# Now look for the final atom we need:
for b in ct.atom[C1atom].bond:
conn_atom = b.atom2
if int(conn_atom) != ret_list[1]:
# This is probably the easiest way to find a non-H atom:
C2atom = int(conn_atom)
ret_list.append(C2atom)
# Leave the loop:
break
return ret_list
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Please specify 2 arguments: input and output file names.")
sys.stdout.flush()
sys.exit(1)
inputfile_name = sys.argv[1]
outputfile_name = sys.argv[2]
try:
structure_ct = Structure.read(inputfile_name)
except:
print("ERROR: unable to open file " + inputfile_name)
sys.stdout.flush()
sys.exit(1)
print("Structure extracted from file: " + inputfile_name)
print("Output structure will be written to: " + outputfile_name)
sys.stdout.flush()
instance = ProtAssign(
structure_ct,
interactive=False,
asl='',
noprot_asl='',
use_xtal=False,
do_flips=True,
freeze_existing=False,
include_initial=False,
max_comb=500000,
num_sequential_cycles=30,
max_cluster_size=None,
add_labels=True,
label_pkas=True,
pH=pH_neutral,
use_propka=True,
propka_pH=7.0,
user_states=[],
minimize=False,
)
structure_ct.write(outputfile_name)
print("\nComplete. Output file: %s" % outputfile_name)
sys.stdout.flush()