"""
A module to assign bond orders based on molecular geometry.
Assigns double and triple bonds to structures based on molecular geometry
(bond length, bond angles, and dihedral angles). Useful when importing
ligands from PDBs into Maestro. Please check the output structure for
errors and compare it to the molecular formula. If this script assigns
bond orders incorrectly to a reasonable structure, please email the
maestro file of the structure to help@schrodinger.com.
There is a single public function::
assignbondorders.assign_st(input_st, atoms=None, neutralize=False,
problem_only=False, skip_assigned_residues=True,
_logger=None)
Assigns bond orders to atom list [atoms] of structure input_st and returns a
list of bonds that were adjusted. Bond orders are assigned for all atoms if the
atoms list is empty or not specified.
Copyright (c) Schrodinger, LLC. All rights reserved.
"""
import io
import os
import warnings
import zipfile
import numpy
from rdkit import Chem
from schrodinger import structure
from schrodinger.comparison import atom_mapper
from schrodinger.infra import structure as infrastructure
from schrodinger.job import util
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.thirdparty import rdkit_adapter
from schrodinger.utils import log
# Check whether SCHRODINGER_PYTHON_DEBUG is set for debugging:
DEBUG = (log.get_environ_log_level() <= log.DEBUG)
logger = log.get_output_logger("schrodinger.structutils.assignbondorders")
if DEBUG:
logger.setLevel(log.DEBUG)
DEBUG_AROMATIC = True
DEBUG_BOND_SCORE = True
DEBUG_GROUP = True
# WARNING: Tampering with this value may break something else!!!
DOUBLE_BOND_THRESHOLD = 5.0
ASSIGNED_AROMATIC, NOT_AROMATIC, NOT_ASSIGNED = list(range(3))
# Maximum number of bonds that can be made to an atom (by element):
MAX_VALENCE = [
0, 1, 1, 1, 2, 6, 4, 4, 2, 1, 1, 1, 2, 8, 8, 8, 8, 1, 1, 1, 2, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 1, 1, 1, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 8, 1, 1, 1, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9
]
# FIXME Update these values with a dict for actual bond lengths
# generated by python/test/assignbondorders_test/calc_bond_lengths.py
ATOM_RADII_DICT = {
1: 0.23, # H
5: 0.83, # B
6: 0.68, # C
7: 0.68, # N
8: 0.68, # O
9: 0.64, # F
14: 1.20, # Si
15: 1.05, # P
16: 1.02, # S
17: 0.99, # Cl
33: 1.21, # As
34: 1.22, # Se
35: 1.21, # Br
52: 1.47, # Te
53: 1.40, # I
# NOTE: Not all of these are used
}
CCD_FILE = os.path.join(util.hunt('mmshare', dir='data'), 'mmpdbx',
'cif_components.zip')
assert os.path.isfile(CCD_FILE)
CCD_CACHE = {}
CCD_ATOM_PROPERTY = 's_ppw_CCD_assignment_status'
[docs]def debug(txt):
"""
For general debug messages
"""
logger.debug(txt)
[docs]def debuggroup(msg):
"""
For debugging group bond assignments
"""
if DEBUG_GROUP:
debug(msg)
[docs]def debugbonders(msg):
"""
For debugging aromatic ring assignments
"""
if DEBUG_AROMATIC:
debug(msg)
[docs]def debugbond(msg):
"""
For debugging code that calculates bond "scores"
"""
if DEBUG_BOND_SCORE:
debug(msg)
[docs]def warning(msg):
# NOTE: For now, print in debug mode only:
logger.debug(msg)
[docs]def get_single_bond_length(a1, a2):
"""
Returns ideal length of a single-order bond between specified atoms.
If either of the atoms is non-common, returns 0.0.
"""
radius1 = ATOM_RADII_DICT.get(a1.atomic_number)
radius2 = ATOM_RADII_DICT.get(a2.atomic_number)
if not radius1 or not radius2:
return 0.0
else:
return radius1 + radius2 + 0.1
[docs]def get_neighbors(st, atom):
""" Returns a list of atoms that <atom> is bound to."""
return [b.atom2.index for b in st.atom[int(atom)].bond]
[docs]def calculate_average_angle(st, atom_num):
"""
Calculates the average of all bond angles around the input atom.
Returns 0 if the angle can not be calculated.
Close to 109 degrees = tetrahedral
Close to 120 degrees = planar
Close to 180 degrees = linear
"""
neighbors = get_neighbors(st, atom_num)
num_n = len(neighbors)
if num_n <= 1:
return 0
elif num_n >= 4:
return 0
elif num_n == 2:
return st.measure(neighbors[0], atom_num, neighbors[1])
elif num_n == 3:
angle1 = st.measure(neighbors[0], atom_num, neighbors[1])
angle2 = st.measure(neighbors[0], atom_num, neighbors[2])
angle3 = st.measure(neighbors[1], atom_num, neighbors[2])
return (angle1 + angle2 + angle3) / 3
[docs]def calculate_dihedral(st, a1, a2):
"""
Calculates the average dihedral angle of the bond between the specified
atoms. Close to 0 (zero) is likely to be double bond.
"""
a1 = int(a1)
a2 = int(a2)
num_angles = 0
dihedral_angle_sum = 0.0
substituents1 = get_neighbors(st, a1)
substituents2 = get_neighbors(st, a2)
if a2 not in substituents1 or a1 not in substituents2:
return -1
substituents1.remove(a2)
substituents2.remove(a1)
if len(substituents1) < 1 or len(substituents2) < 1:
return -1
for sub1 in substituents1:
for sub2 in substituents2:
num_angles += 1
angle = st.measure(sub1, a1, a2, sub2)
if angle < 0:
angle = -angle
if angle > 90:
angle = 180.0 - angle
dihedral_angle_sum += angle
return dihedral_angle_sum / num_angles
[docs]def order_ring_or_chain(st, atoms):
"""
This function orders the atoms in the ring or chain by bonding order.
"""
atoms_ordered = []
input_num = len(atoms)
if len(atoms) <= 2:
return atoms
# if chain, look for terminal atom
terminal = atoms[0]
is_ring = 1
for atom_num in atoms:
neighbors = get_neighbors(st, atom_num)
bondable_neighbors = []
for n in neighbors:
if n in atoms:
bondable_neighbors.append(n)
num_n = len(bondable_neighbors)
if num_n == 1:
terminal = atom_num
is_ring = 0
break
atoms_ordered.append(terminal)
current_atom = st.atom[terminal]
atoms.remove(terminal)
if is_ring == 1:
if st.areBound(atoms[0], terminal):
atoms_ordered.append(atoms[0])
current_atom = st.atom[atoms[0]]
atoms.remove(atoms[0])
current_atom_save = 0
while atoms != []: # goes through this code once for every atom
next_atom = 0
for ibond in current_atom.bond:
atom2 = ibond.atom2
if atom2.index in atoms and atom2.index not in atoms_ordered:
atoms_ordered.append(atom2.index)
current_atom = atom2
atoms.remove(current_atom.index)
next_atom = 1
break
if next_atom == 0:
if current_atom == current_atom_save and len(atoms) > 0:
# sorted the current chain. Move on to the next:
atoms_ordered.append(atoms[0])
current_atom = st.atom[atoms[0]]
atoms.remove(atoms[0])
current_atom_save = current_atom
if input_num != len(atoms_ordered):
print(" INPUT AND OUTPUT HAVE DIFFERENT ATOM #!")
return atoms_ordered
[docs]def do_rings_share_atoms(r1, r2):
for a1 in r1:
for a2 in r2:
if a1 == a2:
return True
return False
[docs]def get_rings_in_group(ring, rings_to_check):
group = []
group.append(ring)
for n1 in rings_to_check:
if n1 not in group and do_rings_share_atoms(ring, n1):
group.append(n1)
for n2 in rings_to_check:
if n2 not in group and do_rings_share_atoms(n1, n2):
group.append(n2)
for n3 in rings_to_check:
if n3 not in group and do_rings_share_atoms(n2, n3):
group.append(n3)
for n4 in rings_to_check:
if n4 not in group and do_rings_share_atoms(
n3, n4):
group.append(n4)
return group
[docs]class CCDAtomMapper(atom_mapper.ConnectivityAtomMapper):
"""
Class for determining most ideal mapping of a CCD SMILES structure to a
3D ligand conformation.
"""
[docs] def score_mapping(self, ccd_st, st, atset):
"""
This method is called to "score" a given mapping. Here, both input
structures have been re-ordered to have the same atom ordering, and
<atset> is a set of atoms that are "shared" between the 2 structures.
"""
st = st.copy()
# Copy bond orders and formal charges from CCD to ST:
het_heavy_atoms = list(atset)
_assign(ccd_st, het_heavy_atoms, st, het_heavy_atoms)
# For now, we only consider valence violations and differences in
# chiralities. In the future, bad geometries will be penalized as well,
# and eventually perhaps bad interactions with the protein.
num_issues = 0
for anum in atset:
# Make sure to consider bonds to hydrogens (which are not in atset)
# and the protein (covalent bonds), metals, etc.
atom = st.atom[anum]
total_orders = sum(bond.order for bond in atom.bond)
if total_orders > MAX_VALENCE[atom.atomic_number]:
num_issues += 1
ccd_ch = ccd_st.atom[anum].chirality
st_ch = st.atom[anum].chirality
if ccd_ch != st_ch and ccd_ch not in (None, 'undef'):
num_issues += 1
return num_issues
[docs]class AssignBondOrders(object):
[docs] def __init__(self, st, atoms, neutralize):
"""
This is a private class; use assign_st() function instead.
"""
self.st = st
self.neutralize_groups = neutralize
# Optimize by making a SET of atoms. Also exclude any hydrogens:
self.atoms = set()
for a in atoms:
if self.st.atom[a].atomic_number != 1:
self.atoms.add(a)
self.amide_nitrogens = []
debug("Assigning orders to atoms: %s" % self.atoms)
self.all_rings = []
self.aromatic_rings = []
self.aromatic_ring_atoms = set()
self.aromatic_ring_groups = []
self.bonds_in_6mem_rings = set()
# Set of (atom1, atom2). Atom with lowest index appears first
self.bonds_in_5mem_rings = set()
self.changed = False
self.assigned_bonds = []
[docs] def assign(self):
debug("Number of atoms in the structure: %i" % self.st.atom_total)
# Find atoms with zero-order bonds:
# FIXME put this into a separate method eventually?
self.zero_bonded_atoms = set()
for bond in self.st.bond:
if bond.order == 0:
self.zero_bonded_atoms.add(bond.atom1.index)
self.zero_bonded_atoms.add(bond.atom2.index)
debug("assigning templated groups")
self.assignTemplatedSubstructures()
debug("getting rings")
self.findRings()
self.findAromaticRings()
debug("identifying groups by connectivity")
self.identifyGroups()
self.assignTripleBonds()
self.groupAromaticRings()
self.assignAromaticRingOrders()
self.assignChainOrders()
self.fixKetones()
self.fix_N4s()
self.st.retype()
self.st.retype() # Needs to be run twice to work properly
debug("Assigning structure is complete")
return self.assigned_bonds
[docs] def assignTemplatedSubstructures(self):
"""
Force assignment of known functional groups by SMARTS
"""
groups_list = [
("Staurosporine core",
"[O-0X1][C-0X3]1[N-0X2][C-0X2][C-0X3]2[C-0X3]1[C-0X3]1[C-0X3]3[C-0X2][C-0X2][C-0X2][C-0X2][C-0X3]3[N-0X3][C-0X3]1[C-0X3]1[N-0X3][C-0X3]3[C-0X2][C-0X2][C-0X2][C-0X2][C-0X3]3[C-0X3]12",
{
(0, 1): 2,
(2, 3): 1
}, {}),
(
"Heme core", # 1hho
"[C-0X3]1[C-0X3][C-0X3]2[C-0X2][C-0X3]3[C-0X3][C-0X3][C-0X3]([C-0X2][C-0X3]4[C-0X3][C-0X3][C-0X3]([C-0X2][C-0X3]5[C-0X3][C-0X3][C-0X3]([C-0X2][C-0X3]1[N-0X3]2)[N-0X3]5)[N-0X3]4)[N-0X3]3",
{
(0, 1): 2,
(2, 3): 2,
(4, 5): 2,
(6, 7): 2,
(8, 9): 2,
(10, 11): 2,
(12, 22): 2,
(13, 14): 2,
(15, 16): 2,
(17, 18): 2,
(19, 20): 2,
},
{
21: -1,
23: -1
},
# FIXME Add the Fe to the SMARTS pattern! Otherwise non-templated
# structures will not get a +2 charge on the iron.
),
(
"Siroheme core (with ZOBs and Iron)", # 1aop
"[C-0X4]1[C-0X3][C-0X3]2[C-0X2][C-0X3]3[C-0X4][C-0X3][C-0X3]4[C-0X2][C-0X3]5[C-0X3][C-0X3][C-0X3]6[C-0X2][C-0X3]7[C-0X3][C-0X3][C-0X3]8[C-0X2][C-0X3]1[N-0X3]2_[Fe-0X4](_[N-0X3]78)(_[N-0X3]34)_[N-0X3]56",
{
(2, 20): 2,
(3, 4): 2,
(7, 8): 2,
(9, 23): 2,
(10, 11): 2,
(12, 13): 2,
(14, 15): 2,
(16, 17): 2,
(18, 19): 2,
},
{
22: -1,
23: -1,
21: +2
},
# FIXME Consider adding support for other metals.
),
(
"Siroheme core (without ZOBs nor Iron)", # 1aop, during PPW workflow
"[C-0X4]1[C-0X3][C-0X3]2[C-0X2][C-0X3]3[C-0X4][C-0X3][C-0X3]([C-0X2][C-0X3]4[C-0X3][C-0X3][C-0X3]([C-0X2][C-0X3]5[C-0X3][C-0X3][C-0X3]([C-0X2][C-0X3]1[N-0X3]2)[N-0X3]5)[N-0X3]4)[N-0X3]3",
{
(2, 20): 2,
(3, 4): 2,
(7, 8): 2,
(9, 22): 2,
(10, 11): 2,
(12, 13): 2,
(14, 15): 2,
(16, 17): 2,
(18, 19): 2,
},
{
21: -1,
23: -1
},
),
(
"Cobalamin core", # 7req
"[C-0X3]1[C-0X4][C-0X3]2[C-0X3][C-0X3]3[C-0X3][C-0X4][C-0X4]([N-0X3]3)[C-0X3]3[C-0X3][C-0X4][C-0X3]([C-0X3][C-0X3]4[C-0X3][C-0X4][C-0X3]([C-0X2][C-0X3]1[N-0X3]2)[N-0X3]4)[N-0X3]3",
{
(2, 3): 2,
(4, 8): 2,
(12, 13): 2,
(14, 21): 2,
(17, 18): 2,
(19, 20): 2,
},
{
22: -1
},
),
]
regenerate_tmp_st = True
for name, smarts, bonds, charges in groups_list:
if regenerate_tmp_st:
# Create a CT with only the atoms that need to be assigned:
assigned_atoms = []
for anum in range(1, self.st.atom_total + 1):
if anum not in self.atoms:
assigned_atoms.append(anum)
tmp_st = self.st.copy()
renumber_map = tmp_st.deleteAtoms(assigned_atoms,
renumber_map=True)
# Invert the renumber map dict:
reverted_renumber_map = {}
for anum in range(1, self.st.atom_total + 1):
new_atomnum = renumber_map[anum]
if new_atomnum is not None:
reverted_renumber_map[new_atomnum] = anum
regenerate_tmp_st = False
# NOTE: To use evaluate_smarts_canvas() here, we need to rewrite
# some of the SMARTS patterns.
with warnings.catch_warnings():
warnings.filterwarnings("ignore",
message=".*evaluate_smarts()",
category=DeprecationWarning)
sets = analyze.evaluate_smarts(tmp_st, smarts, unique_sets=True)
for atom_set in sets:
debug("MATCHED %s: %s" % (name, atom_set))
# To make implementing new patterns easier:
if DEBUG and False:
for i, anum in enumerate(atom_set):
self.st.atom[anum].pdbname = str(i)
for index_atom, value in charges.items():
atom = reverted_renumber_map[atom_set[index_atom]]
if atom not in self.atoms:
# This atom already matched this pattern, and was assigned
# (import for heme-like groups)
continue
self.setFormalCharge(atom, value)
# Make sure this atom does not get re-assigned:
self.atoms.remove(atom)
regenerate_tmp_st = True
for (index_atom1, index_atom2), value in bonds.items():
atom1 = reverted_renumber_map[atom_set[index_atom1]]
atom2 = reverted_renumber_map[atom_set[index_atom2]]
if atom1 not in self.atoms or atom2 not in self.atoms:
# These atoms already matched this pattern, and were assigned
# (import for heme-like groups)
continue
# if atom1 in self.atoms and atom2 in self.atoms:
self.setBondOrder(atom1, atom2, value)
# Make sure these atoms do not get re-assigned
# (importnat especially if the bond was forced to be
# single):
self.atoms.remove(atom1)
self.atoms.remove(atom2)
regenerate_tmp_st = True
[docs] def isOnlySingleBonded(self, atom):
"""
Returns False if atom has double or triple bonds.
"""
for bond in self.st.atom[int(atom)].bond:
if bond.order > 1:
return False
return True
[docs] def getNeighbors(self, atom):
""" Returns a list of atoms that <atomnum> is bound to."""
if type(atom) == type(1):
atom = self.st.atom[atom]
return [n.index for n in atom.bonded_atoms]
[docs] def getNeighborsObj(self, atom):
""" Returns a list of atoms that <atomnum> is bound to."""
if type(atom) == type(1):
atom = self.st.atom[atom]
return list(atom.bonded_atoms)
[docs] def getNumNeighbors(self, atom):
"""
Returns a the number of atoms that <atom> is bound to.
EXCLUDING zero-order bonds
"""
# Ev:100421
if type(atom) == type(1):
# Convert atom number to atom object
atom = self.st.atom[atom]
n = 0
for bond in atom.bond:
if bond.order > 0:
n += 1
return n
[docs] def getNumBondOrders(self, atom):
if type(atom) == type(1):
atom = self.st.atom[atom]
n = 0
for bond in atom.bond:
n += bond.order
return n
[docs] def getElement(self, atomnum):
return self.st.atom[atomnum].element
[docs] def setBondOrder(self, atom1, atom2, bond_order):
""" Sets the bond order between atom1 and atom2 to bond_order."""
atom1 = int(atom1)
atom2 = int(atom2)
bond = self.st.getBond(atom1, atom2)
if not bond:
raise RuntimeError("setBondOrder(): Invalid bond: %s %s" %
(atom1, atom2))
bond.order = bond_order
debug(" Set bond %i-%i order to %i" % (atom1, atom2, bond_order))
if atom1 < atom2:
self.assigned_bonds.append((atom1, atom2, bond_order))
else:
self.assigned_bonds.append((atom2, atom1, bond_order))
# Ev:69974
if bond_order == 3: # For triple bonds only for now
# Charge of one may depend on charge of the other, so need 3:
self.fixFormalCharge(atom1)
self.fixFormalCharge(atom2)
self.fixFormalCharge(atom1)
# PPREP-1047 If any of these atoms is a zero-order bonded carbon with 3
# total orders, then assign a negative charge to it:
for atom in (atom1, atom2):
if atom in self.zero_bonded_atoms \
and self.st.atom[atom].element == 'C' \
and self.getNumBondOrders(atom) == 3:
self.setFormalCharge(atom, -1)
[docs] def getAtomsRings(self, atom):
"""
Return a list of rings that this atom is part of.
"""
atom_rings = []
anum = int(atom)
for ring in self.all_rings:
if anum in ring:
atom_rings.append(ring)
return atom_rings
[docs] def isAtomInRing(self, atom):
anum = atom.index
for ring in self.all_rings:
if anum in ring:
return True
return False
[docs] def isAtomInSmallRing(self, atom_num):
""" Returns 1 if atom_num is a ring with 3, 4, or 5 members. """
debug_small_ring = False
if debug_small_ring:
print(" isAtomInSmallRing() atom =", atom_num)
for n1 in self.getNeighbors(atom_num):
if debug_small_ring:
print(" n1 =", n1)
for n2 in self.getNeighbors(n1):
if n2 == atom_num:
continue
if debug_small_ring:
print(" n2 =", n2)
for n3 in self.getNeighbors(n2):
if n3 == n1:
continue
if debug_small_ring:
print(" n3 =", n3)
if n3 == atom_num:
return True
for n4 in self.getNeighbors(n3):
if n4 == n2:
continue
if debug_small_ring:
print(" n4 =", n4)
if n4 == atom_num:
return True
for n5 in self.getNeighbors(n4):
if n5 == n3:
continue
if debug_small_ring:
print(" n5 =", n5)
if n5 == atom_num:
return True
return False
[docs] def forEdgeInRing(self, ring):
"""
Iterates over (atom1, atom2) in the given ring
"""
# Because -1 is the last item in the list.
for i, a1 in enumerate(ring, start=-1):
yield a1, ring[i]
[docs] def calcBondScore(self, a1, a2, distance_weight=1.0, ring_angle_weight=0.0):
""" Calculates the probability that the two atoms are double bonded to each other. """
st = self.st
a1 = int(a1)
a2 = int(a2)
atom1 = st.atom[a1]
atom2 = st.atom[a2]
a1_neighbors = self.getNeighbors(atom1)
a2_neighbors = self.getNeighbors(atom2)
atom1_nn = len(a1_neighbors)
atom2_nn = len(a2_neighbors)
# Ev:88246 and Ev:122006 Decrease the distance weight for 5 membered rings:
mem5ring = tuple(sorted((a1, a2))) in self.bonds_in_5mem_rings
if mem5ring:
distance_weight -= 0.4
# Decrease the distance weight for 6 membered rings slightly:
mem6ring = tuple(sorted((a1, a2))) in self.bonds_in_6mem_rings
if mem6ring:
distance_weight -= 0.2
bond_score = 0.0
debugbond('\n CALCULATING BOND SCORE FOR BOND: %s %s' % (a1, a2))
if not st.areBound(a1, a2):
debugbond(
" ERROR! calcBondScore(): The input atoms %s & %s are not bonded!!!!!"
% (a1, a2))
return -1
a1_angle = calculate_average_angle(st, a1)
a2_angle = calculate_average_angle(st, a2)
# If one of the atoms is already double bonded to something, then it
# can not double bond to anything else unless it is sp hybridized, in
# which case the bond angle wll be close to 180 degrees:
if not self.isOnlySingleBonded(a1):
if a1_angle < 155: # exception C=C=C and N=N+=N
debugbond(
' ** bond score is 0, because average angle of atom %s is < 155'
% a1)
return 0
if not self.isOnlySingleBonded(a2):
if a2_angle < 155: # exception C=C=C and N=N+=N
debugbond(
' ** bond score is 0, because average angle of atom %s is < 155'
% a2)
return 0
# If Oxygen or Sulfur is already bound twice, then they can't bond again.
if atom1.element in ['O', 'S']:
if len(a1_neighbors) == 2: # bonded to the fullest
if not mem5ring:
# Ev:88246
debugbond(
' ** bond score is 0, because atom %s (%s) is already bonded twice'
% (a1, atom1.element))
return 0
if atom2.element in ['O', 'S']:
if len(a2_neighbors) == 2: # bonded to the fullest
if not mem5ring:
# Ev:88246
debugbond(
' ** bond score is 0, because atom %s (%s) is already bonded twice'
% (a2, atom2.element))
return 0
# Punish Amides:
amideN_present = False
amideN_is_tertiary = False # Whether this amide is tertiary
if atom1.element == 'N':
for n in a1_neighbors:
if n != a2 and st.atom[n].element == 'C':
nns = self.getNeighbors(n)
for nn in nns:
if nn != a1 and st.atom[nn].element == 'O':
if not self.isOnlySingleBonded(nn):
amideN_present = True
if self.getNumBondOrders(atom1) == 3:
amideN_is_tertiary = True
break
if atom2.element == 'N' and not amideN_present:
for n in a2_neighbors:
if n != a1 and st.atom[n].element == 'C':
nns = self.getNeighbors(n)
for nn in nns:
if nn != a2 and st.atom[nn].element == 'O':
if not self.isOnlySingleBonded(nn):
amideN_present = True
if self.getNumBondOrders(atom2) == 3:
amideN_is_tertiary = True
break
if amideN_present:
delta = -4.0
if amideN_is_tertiary:
delta -= 3.0
bond_score += delta
debugbond(" Delta accounting for amideN presence: %.2f" %
delta)
# Account for the bond length:
bond_dist = st.measure(a1, a2)
# the sum of the radii of atom1 and atom2.
# If the bond length is less than this value, the bond is likely double
# Zero is invalid atom elements:
dist_threshold = get_single_bond_length(atom1, atom2)
if dist_threshold == 0.0:
print("WARNING: get_single_bond_length() failed for atoms:", a1, a2)
print(" Atom elements:", atom1.element, atom2.element)
debugbond(" dist= %s " % bond_dist +
" threshold= %s" % dist_threshold)
# dist_threshold - bonds longer than this are likely to be single bonds;
# bonds shorter are likely to be double bonds.
dist_diff = bond_dist - dist_threshold
if dist_diff < -0.2:
# Do not over-reward very short bonds:
extra_diff = -(dist_diff + 0.2)
delta = (0.2 * 70.0 + extra_diff * 30.0)
elif dist_diff < -0.1:
# Reward short bonds:
delta = -(dist_diff * 70.0)
# FIXME penalize the first 0.1A less
elif dist_diff < 0.0:
# Smaller reward for slightly short bonds:
delta = -(dist_diff * 60.0)
else:
# Penalize long bonds:
delta = -(dist_diff * 80.0)
delta *= distance_weight
bond_score += delta
debugbond(" Delta accounting for bond length: %.2f" % delta)
# FIXME
# Decrease the weight if abs_diff is small
# Increase the weight if abs_diff is large
# Account for the dihedral angle
bond_dihedral = calculate_dihedral(st, a1, a2)
delta = 0.0
if bond_dihedral == -1:
debugbond(
" Could not calculate the dihedral angle for bond %i-%i" %
(a1, a2))
elif bond_dihedral < 10.0:
delta += (10.0 - bond_dihedral) / 2
elif bond_dihedral > 10.0:
delta -= (bond_dihedral - 10) / 3
if delta:
bond_score += delta
debugbond(" Delta accounting for dihedral: %f" % delta)
tertiary_nitrogen_punishment = 7.0 # See PPREP-940
terminal_nitrogen_punishment = 6.5 # See PPREP-715
terminal_nitrogen_on_ring_punishment = 5.0 # See PPREP-715
secondary_nitrogen_punishment = 0.5
# Punishing bonds with nitrogens (especially tertiary, which would create
# a nitrogen with a positive charge):
delta = 0.0
if atom1.element == 'N' and atom2.element == 'N':
if not self.isBondRingEdge(atom1, atom2) \
and not self.isAtomBoundToAromaticRing(atom1) \
and not self.isAtomBoundToAromaticRing(atom2):
# Double bonds between nitrogens are not very stable (unless they are
# in an aromatic ring, see PPREP-1004) or bound to aromatic rings,
# see PPREP-1014.
delta = -10.0
# NOTE: Do not increase this penalty (test will fail)
else:
if atom1.element == 'N':
n = atom1
n_count = atom1_nn
other = atom2
elif atom2.element == 'N':
n = atom2
n_count = atom2_nn
other = atom1
else:
n = None
if n:
# One of the atoms in this bond is a nitrogen
if n_count == 3:
delta -= tertiary_nitrogen_punishment
elif n_count == 1:
# Fine-tuning for PPREP-715
if self.isAtomInRing(other):
delta -= terminal_nitrogen_on_ring_punishment
else:
delta -= terminal_nitrogen_punishment
else:
delta -= secondary_nitrogen_punishment
if delta:
bond_score += delta
debugbond(
" Delta accounting for penalizing of special nitrogens %s "
% delta)
# The bond angle is more reliable by this factor if the atom has 3 neighbors rather than 2:
angle_3n_weight = 1.7
angle_2n_weight = 0.1
# If the bond angle can be calculated, reward or penalize it:
for anum, angle, nn in ((a1, a1_angle, atom1_nn), (a2, a2_angle,
atom2_nn)):
if not angle:
continue
diff = angle - 117.0
# Standard reward for positive <diff> and penalty for negative:
if nn == 3: # Angle is more reliable if atom has 3 neighbors:
if diff > 2.5:
# This atom is pretty flat, very likely it's involved in a
# double bond, so increase the weight of diff after the
# first 2.5:
extra_diff = diff - 2.5
diff += (extra_diff * 3.0)
angle_reward = diff * angle_3n_weight
elif self.isAtomInSmallRing(anum):
angle_reward = diff * angle_2n_weight * ring_angle_weight
else:
angle_reward = diff * angle_2n_weight
bond_score += angle_reward
debugbond(" Delta accounting for atom %s angle (%f): %f" %
(anum, angle, angle_reward))
debugbond(" bond_dist=" + str(bond_dist) + " dihedral=" +
str(bond_dihedral) + " a1_nn=" + str(atom1_nn) + " a2_nn=" +
str(atom2_nn))
debugbond(" a1_angle=" + str(a1_angle) + " a2_angle=" +
str(a2_angle))
debugbond(" --- Bond_score for %d-%s = %s" %
(a1, atom2.index, bond_score))
return bond_score
[docs] def getBestDoubleBond(self, a1, a2, a3):
"""
Returns a list of two atoms that are the best candidates.
Returns [] if neither bond is potential double bond.
"""
st = self.st
debug("\ngetBestDoubleBond(%i, %i, %i):" % (a1, a2, a3))
# If both bonds are under DOUBLE_BOND_THRESHOLD, [] is returned
bond1_score = self.calcBondScore(a1, a2)
bond2_score = self.calcBondScore(a2, a3)
debug(" bond %s-%s score: %.2f" % (a1, a2, bond1_score))
debug(" bond %s-%s score: %.2f" % (a2, a3, bond2_score))
if bond1_score == -1:
msg = " ABO ERROR! getBestDoubleBond(): Atoms are not bonded: %i, %i" % (
a2, a3)
raise ValueError(msg)
if bond2_score == -1:
msg = " ABO ERROR! getBestDoubleBond(): Atoms are not bonded: %i, %i" % (
a2, a3)
raise ValueError(msg)
# Positively charged nitrogen REVERSE penalty:
a1_posnit = st.atom[a1].element == 'N' and self.getNumNeighbors(a1) == 3
a3_posnit = st.atom[a3].element == 'N' and self.getNumNeighbors(a3) == 3
if a1_posnit and not a3_posnit:
debug(" Penalizing bond (tertiary N present): %s-%s" % (a1, a2))
bond1_score -= 2.0
elif a3_posnit and not a1_posnit:
debug(" Penalizing bond (tertiary N present): %s-%s" % (a2, a3))
bond2_score -= 2.0
if bond1_score >= bond2_score and bond1_score > DOUBLE_BOND_THRESHOLD:
return [a1, a2]
elif bond2_score > bond1_score and bond2_score > DOUBLE_BOND_THRESHOLD:
return [a2, a3]
else:
debug(" getBestDoubleBond(): Neither is double bond")
return []
[docs] def sortFirstThreeOfGroup(self, atom_group):
""" This function sorts the atoms in the group so that the 3 (or 4) atoms that need to be worked on the first will be listed first."""
debuggroup("\nsortFirstThreeOfGroup(): %s" % atom_group)
st = self.st
terminals = []
linears = []
branches = []
sorted_group = []
neighbors_dict = {}
input_len = len(atom_group)
if len(atom_group) < 2:
return []
# Remove any atoms that are disconneted from the rest of the atoms in
# the group:
_group_excluding_singles = []
for a in atom_group:
neighbors = self.getNeighbors(a)
bondable_neighbors = []
for n in neighbors:
if n in atom_group:
bondable_neighbors.append(n)
if len(bondable_neighbors) != 0:
_group_excluding_singles.append(a)
atom_group = _group_excluding_singles
if len(atom_group) <= 1:
debuggroup(
" ERROR in sortFirstThreeOfGroup()!!! small atom_group!")
return []
for atom_num in atom_group:
neighbors = self.getNeighbors(atom_num)
bondable_neighbors = []
for n in neighbors:
if n in atom_group:
bondable_neighbors.append(n)
neighbors_dict[atom_num] = bondable_neighbors
num_n = len(bondable_neighbors)
if num_n == 1:
terminals.append(atom_num)
elif num_n == 2:
linears.append(atom_num)
elif num_n == 3:
branches.append(atom_num)
elif num_n == 0:
debug(" ERROR!! num_n =0 of atom :" + str(atom_num))
else:
debug(" ERROR in sortFirstThreeOfGroup()!!!!!!!")
return []
# Sort linears
if len(atom_group) <= 2:
return atom_group
if terminals == [] and branches == []:
debuggroup(" the atoms are in a ring!!!")
ring = order_ring_or_chain(st, atom_group)
return ring
# Search for a terminal next to a linear. If exists, add them to the list first
# finds only the first occurance.
for terminal in terminals:
if terminal in sorted_group:
break
for linear in neighbors_dict[terminal]:
if linear in linears and linear not in sorted_group:
sorted_group.append(terminal)
sorted_group.append(linear)
linears_neighbors = neighbors_dict[linear]
for linear_n in linears_neighbors:
if linear_n != terminal and linear_n not in sorted_group:
sorted_group.append(linear_n)
break
break
if not sorted_group:
debuggroup("No linears next to terminal!")
# search for branch bound to two terminals. Make branch go first
for branch in branches:
neighbors = neighbors_dict[branch]
if neighbors[0] in terminals and neighbors[1] in terminals:
sorted_group.append(branch)
sorted_group.append(neighbors[0])
sorted_group.append(neighbors[1])
sorted_group.append(neighbors[2])
break
elif neighbors[1] in terminals and neighbors[2] in terminals:
sorted_group.append(branch)
sorted_group.append(neighbors[1])
sorted_group.append(neighbors[2])
sorted_group.append(neighbors[0])
break
elif neighbors[0] in terminals and neighbors[2] in terminals:
sorted_group.append(branch)
sorted_group.append(neighbors[0])
sorted_group.append(neighbors[2])
sorted_group.append(neighbors[1])
break
if not sorted_group:
# search for a branch surrounded by one terminal:
terminal_n = None
for branch in branches:
neighbors = neighbors_dict[branch]
if len(neighbors):
for n in neighbors:
if n in terminals:
terminal_n = n
break
if terminal_n:
break
if terminal_n:
sorted_group.append(branch)
sorted_group.append(n)
for other_n in neighbors:
if other_n != n:
sorted_group.append(other_n)
debuggroup(" Added branch: %i (%i is terminal)" % (branch, n))
if not sorted_group and len(branches) > 0:
debuggroup(
"Only branches bonded to linears and other branches are left!!!!"
)
# search for any other branch...
branch = branches[0]
neighbors = neighbors_dict[branch]
sorted_group.append(branch)
sorted_group.append(neighbors[0])
sorted_group.append(neighbors[1])
sorted_group.append(neighbors[2])
if not sorted_group and len(atom_group) > 4:
debug(
" ERROR !!!! len(sorted_group) == 0 and len(atom_group) > 4!!!"
)
# Append other atoms to the group:
for atom_num in atom_group:
if atom_num not in sorted_group:
sorted_group.append(atom_num)
if input_len < len(sorted_group):
debug(
"ERROR in sortFirstThreeOfGroup()!!!! Input length < output length !!!"
)
return sorted_group
[docs] def resolveTriangularGroup(self, atoms):
"""
Assign a double bond to "best" bond pair in the set. If any bond score is under
the double bond threshold and is also "terminal", remove it from further
consideration for double bonding.
Input atom list must be sorted: the first atom in the list must be central, and
the three next atoms are bonded to it in a star-like arrangement.
If all three neighbors have bonds scores above the threshold, the one with the
highest score is turned into a double bond. Else, the one with the lowest score
is removed from the set to avoid infinite recursion.
In case of symmetrical inputs, 2 or more bonds may have the same length, etc,
and ties may happen. We need a non-geometrical criterion to resolve ties in a
reproducible manner. For this reason, use atom indexes as a secondary criterion.
"""
DOUBLE_BONDING_SCORE_THRESHOLD = 4
central_atom = atoms[0]
bond_scores = []
for neighbor in atoms[1:4]:
bond_score = self.calcBondScore(central_atom, neighbor)
bond_scores.append((bond_score, neighbor))
if bond_score < DOUBLE_BONDING_SCORE_THRESHOLD:
# If this bond has low score, and is not bound to the rest of the
# atoms in the group (it's "terminal") leave this bond a single bond.
if set(self.getNeighbors(neighbor)).intersection(atoms) == {
central_atom
}:
atoms.remove(neighbor)
return atoms
# If all 3 bonds are "good enough" to be considered for a double
# bond, pick the one with the best score and remove both ends:
bond_scores.sort()
lowest_score, lowest_neighbor = bond_scores[0]
if lowest_score >= DOUBLE_BONDING_SCORE_THRESHOLD:
# All bonds in list are suited to be made double.
# Choose the one with highest score:
highest_score, highest_neighbor = bond_scores[-1]
self.setBondOrder(central_atom, highest_neighbor, 2)
atoms.remove(central_atom)
atoms.remove(highest_neighbor)
else:
# None of the bonds are "good enough" to be made a double bond.
# Remove neighbor that would form the lowest-scoring double bond
# from consideration for the next iteration.
atoms.remove(lowest_neighbor)
return atoms
[docs] def assignGroupDoubleBonds(self, atom_set):
""" Assigns double bonds where appropriate for the atoms in the supplied group. """
if len(atom_set) < 2:
return
atom_set = self.sortFirstThreeOfGroup(atom_set)
debug(
" self.assignGroupDoubleBonds() input after sorting first 3: %s"
% atom_set)
if len(atom_set) < 2: # One sp2 atom by itself. Don't bond.
return
elif len(atom_set) == 2: # 2 potential double-bonding atoms together
bond_score = self.calcBondScore(atom_set[0], atom_set[1])
if bond_score > DOUBLE_BOND_THRESHOLD:
debug(" Adding double bond between " + str(atom_set[0]) +
" and " + str(atom_set[1]))
self.setBondOrder(atom_set[0], atom_set[1], 2)
return
elif len(atom_set) == 3: # 3 potential double-bond atoms together.
best_bond = self.getBestDoubleBond(atom_set[0], atom_set[1],
atom_set[2])
if best_bond != []:
self.setBondOrder(best_bond[0], best_bond[1], 2)
return
else: # 4 or more atoms
if len(atom_set) < 4:
debug("ERROR !!! len(atoms) < 4 !!!!")
return
st = self.st
# Triangular arrangement
if st.areBound(atom_set[0], atom_set[1]) and \
st.areBound(atom_set[0], atom_set[2]) and \
st.areBound(atom_set[0], atom_set[3]):
atom_set = self.resolveTriangularGroup(atom_set)
# The first four atoms in the set have been processed. Now, recurse
# into this method again to move down the atom set
# (THIS CODE MAY HAVE MAJOR PROBLEMS)
self.assignGroupDoubleBonds(atom_set)
return
# NOT triangular arrangement:
# Check to see if all of these atoms form a ring.
in_ring = 1
for atom_num in atom_set:
neighbors = self.getNeighbors(atom_num)
bondable_neighbors = []
for n in neighbors:
if n in atom_set:
bondable_neighbors.append(n)
num_n = len(bondable_neighbors)
if num_n == 1 or num_n == 3:
in_ring = 0
break
elif num_n <= 0 or num_n > 3:
debug(" ERROR!! num_n <=0 or > 3!: %s" % atom_num)
if in_ring:
debug(" THE ATOMS ARE IN A RING!!!")
debug(" ring=%s" % atom_set)
try:
best_bond_of_1_2 = self.getBestDoubleBond(
atom_set[0], atom_set[1], atom_set[2])
except:
debug(" ERROR from getBestDoubleBond() ")
return
atoms = atom_set[:] # modify a copy
if in_ring:
try:
best_bond_of_2_3 = self.getBestDoubleBond(
atom_set[1], atom_set[2], atom_set[3])
if in_ring and best_bond_of_1_2 == best_bond_of_2_3:
self.setBondOrder(best_bond_of_1_2[0],
best_bond_of_1_2[1], 2)
atoms.pop(1)
atoms.pop(1)
self.assignGroupDoubleBonds(atoms)
return
except:
debug(" ERROR from getBestDoubleBond() ")
if best_bond_of_1_2 == []:
debuggroup(" getBestDoubleBond() returned []!!!")
# none are double bonds. remove first two atoms or the second atom
atoms.pop(1)
self.assignGroupDoubleBonds(atoms)
return
first2atoms = [atoms[0], atoms[1]]
if best_bond_of_1_2 == first2atoms:
if in_ring: # reorder the atoms
debuggroup(
" first two atoms could double bond, but it's a ring, therefore reordering atoms"
)
atom0 = atoms[0]
atoms.pop(0)
atoms.append(atom0)
else:
debuggroup(
" first two atoms should double bond! adding bond")
self.setBondOrder(best_bond_of_1_2[0], best_bond_of_1_2[1],
2)
atoms.pop(0)
atoms.pop(0)
self.assignGroupDoubleBonds(atoms)
return
else: # the second bond should be a double bond
if in_ring:
atom0 = atoms[0]
atom1 = atoms[1]
atoms.pop(0)
atoms.pop(0)
atoms.append(atom0)
atoms.append(atom1)
debuggroup(" first two atoms should NOT double bond")
atoms.pop(0)
self.assignGroupDoubleBonds(atoms)
return
[docs] def identifyGroups(self):
""" Identifies groups and sets bond orders appropriately. """
# TODO create a function for each group here:
def _handle_nitroso(r, n, o):
if not self.getNumNeighbors(o) == 1:
return
if self.isAtomAromatic(r):
# FIXME In the future we should expand into detecting
# nitronos in cases where aromatic rings are not involved.
self.setBondOrder(n, o, 2)
flip = 0 # for debugging purposes
debug("\nIdentifying functional groups...")
for a in self.atoms:
atom = self.st.atom[a]
element = atom.element
# Ignore atoms that are not first atoms in templates:
if element not in ['C', 'N', 'P', 'S', 'As']:
continue
# Ignore the atom if it already has double bonds:
if not self.isOnlySingleBonded(atom):
continue
neighbors = []
for natom in atom.bonded_atoms:
neighbors.append(natom)
num_bonds = len(neighbors)
# FIRST ATOM IS NITROGEN:
if element == 'N':
# Nitrate groups:
if num_bonds == 3:
oxygens = []
for natom in neighbors:
if natom.element in ['O', 'S']:
if len(natom.bond) == 1:
oxygens.append(natom.index)
if len(oxygens) == 2:
if flip: # for debugging purposes
tmp = oxygens[0]
oxygens[0] = oxygens[1]
oxygens[1] = tmp
ox0 = self.st.atom[oxygens[0]]
ox1 = self.st.atom[oxygens[1]]
debug("Group: OM oxygen")
# Decide which Oxygen to double bond to...
if ox0.formal_charge == -1 or ox0.atom_type == 18:
self.setBondOrder(a, oxygens[1], 2)
if 1: # not self.neutralize_groups:
ox0.atom_type = 18 # OM
ox0.formal_charge = -1
else:
self.setBondOrder(a, oxygens[0], 2)
if 1: # not self.neutralize_groups:
ox1.atom_type = 18 # OM
ox1.formal_charge = -1
atom.atom_type = 31
atom.formal_charge = 1
continue
if num_bonds == 2:
# R-N-R
n1 = neighbors[0]
n2 = neighbors[1]
if n1.element == 'N' and n2.element == 'N':
# Azide R--N=N=N
debug("Group Azide R--N=N=N")
angle = self.st.measure(n1, a, n2)
if angle > 150:
self.setBondOrder(n1, a, 2)
self.setBondOrder(a, n2, 2)
# PPREP-917 Correct the formal charges:
self.setFormalCharge(atom, +1)
# Add negative charges to side nitrogens (if single-bonded):
for nn in (n1, n2):
# NOTE: atoms ZOB-bonded are NOT considered neighbors
if self.getNumNeighbors(nn) == 1:
self.setFormalCharge(nn, -1)
elif n1.element == 'O':
_handle_nitroso(n2, a, n1)
elif n2.element == 'O':
_handle_nitroso(n1, a, n2)
# FIRST ATOM IS CARBON
elif element == 'C':
# Caboxylic Acids:
if num_bonds <= 3:
c_num = a
# find all OX1's
oxygens = []
for n in neighbors:
if n.element == 'O':
if self.getNumNeighbors(n) == 1:
oxygens.append(n.index)
if len(oxygens) >= 2:
debug("Group: Carboxylic Acid")
if flip: # for debugging purposes
tmp = oxygens[0]
oxygens[0] = oxygens[1]
oxygens[1] = tmp
# Carboxylic Acid - C(=O)-OH ###
ox0 = self.st.atom[oxygens[0]]
ox1 = self.st.atom[oxygens[1]]
# Decide which Oxygen to double bond to...
if ox0.formal_charge == -1 or ox0.atom_type == 18:
self.setBondOrder(c_num, oxygens[1], 2)
if not self.neutralize_groups:
ox0.atom_type = 18
ox0.formal_charge = -1
else:
self.setBondOrder(c_num, oxygens[0], 2)
if not self.neutralize_groups:
ox1.atom_type = 18
ox1.formal_charge = -1
continue
# Amides/Esters
no2s = []
for natom in neighbors:
if natom.element in ['O', 'S']:
if len(natom.bond) > 1:
no2s.append(natom.index)
elif natom.element == 'N':
no2s.append(natom.index)
if len(oxygens) == 1 and len(no2s) >= 1:
if not self.isAtomAromatic(c_num):
o_num = oxygens[0]
no2_num1 = no2s[0]
c_bond_angle = calculate_average_angle(
self.st, c_num)
length = self.st.measure(c_num, o_num)
if (length < 1.36 and
c_bond_angle > 109) or (c_bond_angle > 115):
# See Ev:118556
debug("Group: Amide/ester")
self.setBondOrder(c_num, o_num, 2)
n = self.st.atom[no2_num1]
if n.element == 'N':
self.amide_nitrogens.append(n.index)
continue
# Searching for Amidines/Guanidines:
# R
# \
# C=NH
# /
# NH2
terminal_nitrogen_neighbors = []
for n in self.getNeighbors(a):
if self.st.atom[n].element == 'N':
nonhcount = 0
for nn in self.getNeighbors(n):
if self.st.atom[nn].atomic_number != 1:
nonhcount += 1
if nonhcount == 1:
terminal_nitrogen_neighbors.append(n)
if len(terminal_nitrogen_neighbors) >= 2:
if flip: # For debugging purposes.
dn = terminal_nitrogen_neighbors[1]
else:
dn = terminal_nitrogen_neighbors[0]
debug("Group: Amidine/Guanidine")
self.setBondOrder(c_num, dn, 2)
if self.getNumNeighbors(dn) == 1:
# Ev:120911 This nitrogen is not bound to anything;
# add a hydrogen to it:
self.st.atom[dn].formal_charge = 1
continue
# Ketones
if len(oxygens) == 1:
ox_atom = self.st.atom[oxygens[0]]
the_bond = ox_atom.bond[1]
bond_length = self.st.measure(the_bond.atom1,
the_bond.atom2)
c_bond_angle = calculate_average_angle(self.st, c_num)
bond_likely_double = False
if c_bond_angle > 115 and c_bond_angle < 150:
if bond_length < 1.3:
bond_likely_double = True
c_is_aromatic = self.isAtomAromatic(c_num)
if c_is_aromatic == 0 and bond_likely_double:
debug("Group: Ketone")
self.setBondOrder(c_num, ox_atom, 2)
continue
# Phosphates
# For all single bonded Oxygens, make one double bonded and rest to OM (O-)
elif element == 'P':
# atom (a) is a Phosphorus without double bonds to anything
p_atom = a
one_set_to_double = False
for n in atom.bonded_atoms:
if n.element == 'O' and self.getNumNeighbors(n) == 1:
if not one_set_to_double:
debug("Group: Phosphate")
self.setBondOrder(p_atom, n, 2)
one_set_to_double = True
else:
n.atom_type = 18 # OM
n.formal_charge = -1
# Arsenate
# For all single bonded Oxygens, make one double bonded and rest to OM (O-)
elif element == 'As' and num_bonds == 4:
# atom (a) is a Arsenic without double bonds to anything
as_atom = a
one_set_to_double = False
for n in atom.bonded_atoms:
if n.element == 'O' and self.getNumNeighbors(n) == 1:
if not one_set_to_double:
debug("Group: Arsenate")
self.setBondOrder(as_atom, n, 2)
one_set_to_double = True
else:
n.atom_type = 18 # OM
n.formal_charge = -1
elif element == 'S':
# Sulfones and Thioamides (Ev:53894):
# R
# \
# C=S
# /
# N
s_atom = a
o_neighbors = []
for n in neighbors:
if n.element == 'O' and self.getNumNeighbors(n) == 1:
o_neighbors.append(n.index)
if len(o_neighbors) == 2:
# R
# O=S=O
# R
debug("Group: Sulfone")
self.setBondOrder(s_atom, o_neighbors[0], 2)
self.setBondOrder(s_atom, o_neighbors[1], 2)
continue
elif len(o_neighbors) == 3:
# O
# O=S=O
# R
debug("Group: Sulfone")
# Ev:128301 Do not add double bond to the oxygen that has a negative charge
num_bonds_incremented = 0
for oatom in o_neighbors:
if self.st.atom[oatom].formal_charge == 0:
self.setBondOrder(s_atom, oatom, 2)
num_bonds_incremented += 1
if num_bonds_incremented == 2:
break # 2 dobule bonds is enough
continue
elif len(o_neighbors) == 1:
if self.getNumNeighbors(s_atom) > 2:
debug("Group: R-S(=O)-R")
self.setBondOrder(a, o_neighbors[0], 2)
elif num_bonds == 1 and neighbors[0].element == 'C':
carbon = neighbors[0]
# Go through each neighbor of the carbon that's bonded to the sulfur:
for ns_n in carbon.bonded_atoms:
if ns_n.element == 'N':
if self.getNumNeighbors(carbon) == 2:
# Arrangement: S-C-N
c_bond_angle = calculate_average_angle(
self.st, carbon)
if c_bond_angle > 160:
# Ev:117941
debug("Group: S=C=N (linear), angle: %.2f" %
c_bond_angle)
self.setBondOrder(carbon, ns_n, 2)
self.setBondOrder(s_atom, carbon, 2)
if len(ns_n.bond) == 1:
# Ev:117941
debug(
" Nitrogen is terminal (Thiocynate)"
)
ns_n.formal_charge = -1
break
debug("Group: Thioamide - NC(=S)R")
self.setBondOrder(s_atom, carbon, 2)
break
debug("Identify_groups() has completed.")
return
[docs] def findRings(self):
"""
Returns a list of all rings in the st within the atoms range.
"""
debug("Creating a list of rings...")
raw_rings = self.st.find_rings(sort=True)
postprocessed_rings = []
for ring in raw_rings:
keep_ring = True
for a1, a2 in self.forEdgeInRing(ring):
# Remove rings that contain atoms that we are excluding
# from the assignment:
if a1 not in self.atoms or a2 not in self.atoms:
keep_ring = False
break
# Remove rings that contain ZOBs are their edges:
if self.st.getBond(a1, a2).order == 0:
keep_ring = False
break
if keep_ring:
postprocessed_rings.append(ring)
self.all_rings = postprocessed_rings
debug("all_rings = %s" % self.all_rings)
self.bonds_in_5mem_rings = set()
self.bonds_in_6mem_rings = set()
for ring in self.all_rings:
if len(ring) == 5:
for a1, a2 in self.forEdgeInRing(ring):
self.bonds_in_5mem_rings.add(tuple(sorted((a1, a2))))
elif len(ring) == 6:
for a1, a2 in self.forEdgeInRing(ring):
self.bonds_in_6mem_rings.add(tuple(sorted((a1, a2))))
debug("bonds_in_5mem_rings = %s" % self.bonds_in_5mem_rings)
debug("bonds_in_6mem_rings = %s" % self.bonds_in_6mem_rings)
[docs] def isRingAromatic(self, ringatoms):
"""
Returns a boolean - whether the specified ring is aromatic or not.
Aromatic ring is defined in this context as a 5 or 6 membered ring
that is likely, based on bond lengths and angles, etc. to be an
aromatic ring.
ringatoms are assumed to be in connectivity order.
"""
debug("\nisRingAromatic(): ring: %s" % ringatoms)
st = self.st
ring = structure._Ring(st, 0, ringatoms, None)
num_mem = len(ringatoms)
if num_mem not in [5, 6]:
debug(
" Not aromatic. Only 5 and 6 atom rings are considered aromatic by this code"
)
return False # NOT 5 or 6 membered; go to the next ring
planarity = calculate_planarity(self.st, ringatoms)
debug(" planarity: %.2f" % planarity)
# Look for atoms that can't be present in aromatic rings
bad_atoms_present = False
for atom in ring.atom:
element = atom.element
angle = calculate_average_angle(st, atom)
num_n = self.getNumNeighbors(atom)
if num_n < 2 or num_n > 3:
bad_atoms_present = True
debug(" NON-AROMATIC ATOM: < 2 or > 3 neighbors for atom %i" %
atom)
elif angle < 115 and num_n == 3:
bad_atoms_present = True
debug(
" NON-AROMATIC ATOM: small ave angle for atom %i angle: %.3f"
% (atom, angle))
elif element not in ['C', 'N', 'O', 'S',
'Si']: # atom other than C and N
debug(
" NON-AROMATIC ATOM: Bad element found! Atom %i, element: %s"
% (atom, element))
bad_atoms_present = True
if bad_atoms_present:
debug(" Not aromatic because unallowable atoms are present")
return False
debug(" No bad atoms present. Checking angles...")
# IMPORTANT a non-aromatic ring can have double bonds in it also
# So it is BAD dihedral & distances that we need to punish.
# 5 membered rings are more sensitive to dihedrals & lengths:
if num_mem == 5:
distance_weight = 1.4
score_cutoff = 5.0
score_weight = 0.5
penalty_threshold = 4.0
ring_angle_weight = 0.5
score_top_cutoff = 17.0
planarity_weight = 0.8
elif num_mem == 6:
distance_weight = 0.8
score_cutoff = 5.0 # penalize bonds with scores less than this
score_weight = 0.01 # how important is bond score
penalty_threshold = 5.0 # penalty more than this = non-aromatic
ring_angle_weight = 0.3
score_top_cutoff = 20.0
planarity_weight = 1.0
penalty = 0
# Fix for Ev:107169
num_high_scoring_bonds = 0
for edge in ring.edge:
atom1 = edge.atom1
atom2 = edge.atom2
score = self.calcBondScore(atom1, atom2, distance_weight,
ring_angle_weight)
debug(' BOND (%i, %i) SCORE: %f' %
(atom1.index, atom2.index, score))
if score < score_cutoff: # Likely a single bond
debug(" bond score is below cutoff of %f" % score_cutoff)
penalty += (score_cutoff - score) * score_weight
debug(" increasing penalty by %f" %
((score_cutoff - score) * score_weight))
elif score > score_top_cutoff: # Likely a double bond
debug(" bond score is above top cutoff of %f" %
score_top_cutoff)
num_high_scoring_bonds += 1
if num_high_scoring_bonds >= 4:
# Fix for Ev:107169
# Likely to be an aromatic ring in 4 out of 5 or 6 bonds are high-scoring.
debug(
" Decreasing penalty by 1.0 due to high number of high scoring bonds"
)
penalty -= 1.0
# Rings less planar will be penalized:
# NOTE: It is possible for a 5-member ring to be completely planar, yet
# not be aromatic.
p_penalty = planarity * planarity_weight
debug(" penalty due to non-planarity: %f" % p_penalty)
penalty += p_penalty
debug(' RING PENALTY: %s THRESHOLD: %s' % (penalty, penalty_threshold))
if penalty < penalty_threshold:
debug(' aromatic')
return True
else:
debug(' non aromatic')
return False
[docs] def findAromaticRings(self):
""" Returns a list of aromatic rings in st within all_rings."""
aromatic_rings = []
bu_level = logger.level
self.aromatic_ring_atoms = set()
for ringatoms in self.all_rings:
if self.isRingAromatic(ringatoms):
aromatic_rings.append(ringatoms)
self.aromatic_ring_atoms.update(ringatoms)
logger.setLevel(bu_level)
self.aromatic_rings = aromatic_rings
[docs] def isAtomAromatic(self, atom):
""" Returns True if the given atom is part of an aromatic ring """
anum = int(atom)
for r in self.aromatic_rings:
if anum in r:
return True
return False
[docs] def areAtomsOrtho(self, ring, atom1, atom2):
r1_index = ring.index(atom1)
r2_index = ring.index(atom2)
return abs(r1_index - r2_index) == 3
[docs] def isBondRingEdge(self, a1, a2, rings=None):
a1 = a1.index
a2 = a2.index
if rings is None:
rings = self.all_rings
for r in rings or rings:
try:
index1 = r.index(a1)
index2 = r.index(a2)
except ValueError:
continue
else:
if abs(index1 - index2) == 1 or (index1 == 0 and
index2 == len(r) - 1) or (
index2 == 0 and
index2 == len(r) - 1):
return True
return False
[docs] def isAtomBetweenTwoRings(self, atom):
"""
Returns True if this atom is bound to two rings, but is not part
of a ring itself. Like this: Ring-X-Ring.
Such an atom may get a double bond to one of the rings if needed.
"""
anum = atom.index
num_rings = 0
num_others = 0
# Need to make sure this atom is not bound to anything else
# other than these two rings (for now). In the future, we could
# check whether the geometry is planar (if bound to 3 other atoms).
for bond in atom.bond:
if bond.order == 1:
nnum = bond.atom2.index
n_in_ring = False
for ring in self.all_rings:
if nnum in ring and anum not in ring:
n_in_ring = True
break
if n_in_ring:
num_rings += 1
else:
num_others += 1
return (num_rings == 2) and (num_others == 0)
# FIXME perhaps 3 rings is okay also (if the geometry is planar)?
[docs] def isAtomBoundToAromaticRing(self, atom):
"""
Returns True if at least one of atoms bound to it is a member of an
aromatic ring.
"""
for bond in atom.bond:
if bond.atom2.index in self.aromatic_ring_atoms:
return True
return False
[docs] def assignAromaticBonders(self, ring, bonders):
"""
Assigns bond orders for the supplied 5 or 6-membered aromatic ring.
Returns True if successful, False if not.
<bonders> is a list of atoms that MUST get double-bonded.
"""
if len(ring) != 5 and len(ring) != 6:
debug(" EXITING - require 5 or 6 membered ring")
return False
debug(" assignAromaticBonders() ring: %s, bonders: %s" %
(ring, bonders))
if len(bonders) == 6:
# 3 bonds from this ring must be double.
# We need to pick one bond which is the most ideal canditate for a
# double bond, and then assign the other 4 later.
# If this ring shares an edge with ONE 5-membered aromatic ring (as in
# 1fvt), make that bond double. This makes the assignment of that ring
# more flexible later (more options).
# Do NOT do this if the 6-membered ring is stuck between 2 opposing
# 5-membered rings (such as in 3coh), as that can cause a condition
# where the 2 remaining bonders are not bonded to each other.
shared_edges = []
for a1, a2 in self.forEdgeInRing(ring):
if tuple(sorted((a1, a2))) in self.bonds_in_5mem_rings:
shared_edges.append((a1, a2))
# Pick one "shared" edge to make a double bond:
# FIXME ideally we should add logic to pick the "best" bond if more than
# one shared edge is found.
if shared_edges:
atom1, atom2 = shared_edges[0]
self.setBondOrder(atom1, atom2, 2)
# Must have the "if" checks, since each atom can be in 2 edges
if atom1 in bonders:
bonders.remove(atom1)
if atom2 in bonders:
bonders.remove(atom2)
# Ev:63487 Put the atoms into groups by connectivity:
bonders_groups = []
curr_group = []
for anum in ring:
if anum in bonders:
curr_group.append(anum)
else:
if curr_group:
bonders_groups.append(curr_group)
curr_group = []
if curr_group:
bonders_groups.append(curr_group)
# Join first and last bonders_groups if first and last atoms in the
# ring are both bonders:
if len(bonders_groups) > 1:
if ring[0] in bonders and ring[-1] in bonders:
bonders_groups[0].extend(bonders_groups[-1])
bonders_groups.remove(bonders_groups[-1])
debug(" Bonders groups: %s" % bonders_groups)
for group in bonders_groups:
debug(' Analyzing group: %s' % group)
if len(group) == 1:
debug(" ONLY 1 not doubled atom in ring group!")
elif len(group) == 2:
if self.st.areBound(group[0], group[1]):
debug(" adding aromatic bond: %i-%i" %
(group[0], group[1]))
self.setBondOrder(group[0], group[1], 2)
elif len(group) == 3:
debug(" THREE not doubled in ring group!")
elif len(group) == 4:
group = order_ring_or_chain(self.st, group)
if self.st.areBound(group[0], group[1]):
if not self.st.areBound(group[2], group[3]):
raise ValueError("Ring atoms %i and %i are not bound" %
(group[2], group[3]))
debug(" adding aromatic bonds: %i-%i, %i-%i" %
(group[0], group[1], group[2], group[3]))
self.setBondOrder(group[0], group[1], 2)
self.setBondOrder(group[2], group[3], 2)
else:
if not self.st.areBound(group[1], group[2]):
raise ValueError("Ring atoms %i and %i are not bound" %
(group[1], group[2]))
if not self.st.areBound(group[0], group[3]):
raise ValueError("Ring atoms %i and %i are not bound" %
(group[0], group[3]))
debug(" adding aromatic bonds: %i-%i, %i-%i" %
(group[1], group[2], group[0], group[3]))
self.setBondOrder(group[1], group[2], 2)
self.setBondOrder(group[0], group[3], 2)
elif len(group) == 5:
debug(" assignAromaticBonders(): FIVE bonders given!!!")
elif len(group) == 6 and len(ring) == 6:
debug(" adding aromatic bonds: %i-%i, %i-%i, %i-%i" %
(ring[1], ring[2], ring[3], ring[4], ring[5], ring[0]))
self.setBondOrder(ring[1], ring[2], 2)
self.setBondOrder(ring[3], ring[4], 2)
self.setBondOrder(ring[5], ring[0], 2)
[docs] def determineAromaticAtomType(self, ring, a, ring_group_atoms):
""" Determines the type of the atom in the aromatic ring. """
def get_ring_neighbors(ring_atom):
neighbors = []
for bond in self.st.atom[ring_atom].bond:
ni = bond.atom2.index
if ni in ring:
neighbors.append(ni)
return neighbors
aatom = self.st.atom[a]
a_element = aatom.element
# search for REQUIRED non-bonders ###############
# X --X
# | \
# | O/S
# | /
# X---X
if a_element in ['O', 'S']:
return ("RNB", 0)
# search for REQUIRED non-bonders ###############
# X---X X===X
# | \ | |
# | C==X and X X
# | / \ /
# X---X X
if not self.isOnlySingleBonded(aatom):
return ("RNB", 0)
if a_element == 'C':
neighbors = self.getNeighbors(a)
# search for: ############
# X --X
# | \
# | C <-----
# | /
# X---X
if len(neighbors) == 2: # carbon only
return ("RB", 0)
# It is a carbon bound to 3 neighbors
for n in neighbors:
if n not in ring:
natom = self.st.atom[n]
n_element = natom.atomic_number
n_element_name = natom.element
# search for: ############
# X --X
# | \
# | C==O <-----
# | /
# X---X
if n_element == 8 or n_element == 16:
if len(natom.bond) == 1:
# If the atom already has a negative charge, don't
# add the double bond:
if natom.formal_charge == -1:
return ("RB", 0)
else:
return ("CO", 0)
# search for atoms bound to other aromatic rings:
# X---X X---X
# | \ / \
# | O C---n O X
# | / \ /
# X---X X---X
# Whether (n) is part of another aromatic ring
other_ring = None
na_same_r = False
for r in self.aromatic_rings:
if n in r:
other_ring = r
na_same_r = (a in r)
break
if other_ring:
# If (n) is part of an aromatic ring
if n not in ring_group_atoms:
# The other atom (n) is NOT in this aromatic group
# Ev:127473 (1e9h) FIXME This code is not ideal:
# Also see PPREP-940, which this section of code
# initially created
if len(other_ring) == 5 and len(ring) == 5:
debugbonders(" Bound to another aromatic ring")
if self.st.getBond(a, n).length < 1.37:
return ("RNB", 0)
return ("RB", 0)
if na_same_r:
# The other atom (n) and (a) are in ANOTHER
# aromatic ring together
# XDXAR ####################################
# X----X a = Carbon
# | \
# | O a---n <- Carbon or nitrogen : MAY NEED TO BE CHANGED!
# | / \
# X----X O X
# \ /
# X---x
if (n_element == 6 or n_element == 7):
return ("XDXAR", 0)
# XDXAR ####################################
# X----X
# | \
# | O a---N (terminal)
# | /
# X----X
# The neighbor is terminal Nitrogen
if len(natom.bond) == 1 and n_element == 7:
pass # return ("XDXAR",points)
# search for REQUIRED BONDERS: ############
# X---X
# | \
# | C---X (non-double-bond that can't potentially double bond)
# | / X != C,N,O,S except -O- and -S-
# X---X
if n_element != 6 and n_element != 8 and n_element != 7 and n_element != 16:
return ("RB", 0)
if n_element == 8 or n_element == 16:
if len(natom.bond) >= 2:
return ("RB", 0)
else: # CO
break
# search for REQUIRED BONDERS: ############
# X---X
# | \
# | C---C/N (non-double-bond that can't potentially double bond)
# | /
# X---X
# bound to carbon outside of ring
if n_element == 6 or n_element == 7:
score = self.calcBondScore(a, n)
debugbonders(" outside score for %s-%s = %s" %
(a, n, score))
if score < 5.0:
return ("RB", 0)
if self.st.measure(a, n) > 1.45:
# "Longer" bonds can be considered double
# only if they are next to a ketone carbon.
# (1r78)
ketone_neighbor = False
for n in get_ring_neighbors(a):
if self.isKetoneCarbon(n):
ketone_neighbor = True
break
if not ketone_neighbor:
return ("RB", 0)
break
debugbonders(" XDX[AR][CH2AR]...")
# search for POTENTIAL systems: ############
# (exocyclic double-bond) ##############
# X --X
# | \
# | C==X <-----
# | /
# X---X
if a_element == 'C' and len(neighbors) == 3: # carbon
n = self.getExtracyclicAtom(ring, aatom)
score = self.calcBondScore(a, n)
debugbonders(" Score to exocyclic atom: %s->%s = %.2f" %
(a, n, score))
# PPREP-956 Must be only single bonded:
if self.isOnlySingleBonded(
n) and score >= 6.0 and n_element_name == 'C':
# XDX and XDXCH2AR##########################
# X----X ( nn1 ) <- Optional
# | \ /
# | a(C)-n(C)
# | / \
# X----X ( nn2 ) <- Optional
for n2 in self.getNeighbors(n):
if n2 == a: # n2 in ring:
continue
# XDXCH2AR ##################
# X----X
# | \
# | O a---n(CH2)
# | / \
# X----X n2==X
# / \
# X Arom. X
# \ /
# X---X
ns_neighbors = self.getNeighbors(n2)
if self.st.atom[
n2].element == 'C' and self.isOnlySingleBonded(
n):
exit = False
for n3 in ns_neighbors:
if n3 in ring:
exit = True
if exit:
break
if self.isAtomAromatic(
n2) and not self.isAtomAromatic(n):
return ("XDXCH2AR", 0)
return ("XDX", score)
if a_element == 'N':
# FIXME move to top of function:
neighbors = self.getNeighbors(a)
if len(neighbors) == 2:
# X --X
# | \
# | N <-----
# | /
# X---X
return ("N2", 0)
elif len(neighbors) == 3:
for n in neighbors:
if n not in ring:
natom = self.st.atom[n]
if natom.element == 'O' and len(natom.bond) == 1:
# X --X
# | \
# | N--O <-----
# | /
# X---X
return ("N3O", 0)
# Ev:96621
# Check whether the neighbor is in another aromatic
# ring with the nitrogen:
for r in self.aromatic_rings:
if n in r and a in r:
# X --X
# | \
# | N--X
# | / \ <- another aromatic ring
# X---X X
# \.../
return ("NAr", 0)
# search for: ###############
# X --X
# | \
# | N -- X <-----
# | /
# X---X
return ("N3", 0)
# If the atom does not meet any of the criteria above...
return ("U", 0)
[docs] def areCOsPara(self, ring, c1, c2):
if len(ring) != 6:
return False
# Determine whether the carbonyls are in in para arrangement:
c1i = c2i = None
for i, ratom in enumerate(ring):
if ratom == c1:
c1i = i
if ratom == c2:
c2i = i
return (c1i == (c2i + 3) or c2i == (c1i + 3))
[docs] def determineAromaticAtomTypes(self, ring, ring_group_atoms):
""" Goes through every atom in the supplied aromatic ring, and determines what type of atom it is, then adds the atom to that category. It returns a dictionary (list) of atoms in different groups."""
debug(" Determining aromatic atom types in the ring %s" % ring)
unknowns = []
required_non_bonders = []
required_bonders = []
CO_members = []
XDXAR_members = []
XDXCH2AR_members = []
N3_members = []
N3O_members = []
NAr_members = []
N_members = []
XDX_members_dict = {}
# Break existing in-ring double bonds, so that potentially they could be
# moved to a different location. Example Ev:135383
ring_obj = structure._Ring(self.st, 0, ring, None)
for edge in ring_obj.edge:
if edge.order == 2:
required_bonders.append(edge.atom1.index)
required_bonders.append(edge.atom2.index)
debugbonders(
" Existing double bond added as required_bonders: %s, %s"
% (edge.atom1.index, edge.atom2.index))
edge.order = 1
for a in ring:
if a in required_bonders:
continue
(type,
points) = self.determineAromaticAtomType(ring, a, ring_group_atoms)
if type == "RNB":
required_non_bonders.append(a)
elif type == "RB":
required_bonders.append(a)
elif type == "CO":
CO_members.append(a)
elif type == "XDXCH2AR":
XDXCH2AR_members.append(a)
elif type == "XDXAR":
XDXAR_members.append(a)
elif type == "XDX":
debugbonders("XDX found!")
XDX_members_dict[a] = points
elif type == "N3":
N3_members.append(a)
elif type == "N3O":
N3O_members.append(a)
elif type == "NAr":
NAr_members.append(a)
elif type == "N2":
N_members.append(a)
else:
unknowns.append(a)
XDX_members = list(XDX_members_dict)
ringatomtypes_dict = {}
ringatomtypes_dict["RNB"] = required_non_bonders
ringatomtypes_dict["RB"] = required_bonders
ringatomtypes_dict["CO"] = CO_members
ringatomtypes_dict["XDX"] = XDX_members
ringatomtypes_dict["XDXAR"] = XDXAR_members
ringatomtypes_dict["XDXCH2AR"] = XDXCH2AR_members
ringatomtypes_dict["N3"] = N3_members
ringatomtypes_dict["N3O"] = N3O_members
ringatomtypes_dict["NAr"] = NAr_members
ringatomtypes_dict["N2"] = N_members
ringatomtypes_dict["U"] = unknowns
# Print a sorted list list:
if DEBUG:
atom_types_list = []
for type, atoms in ringatomtypes_dict.items():
for atomnum in atoms:
atom_types_list.append("%i:%s" % (atomnum, type))
atom_types_list.sort()
debug(" Atom types: %s" % ", ".join(atom_types_list))
return ringatomtypes_dict
[docs] def assignGroupTripleBonds(self, atom_group):
""" Assigns triple bonds as appripriateley to the atoms in the supplied group. Returns a list of atoms which now have assigned bond orders."""
if len(atom_group) == 1:
pass
elif len(atom_group) == 2:
if self.st.measure(atom_group[0], atom_group[1]) < 1.33:
self.setBondOrder(atom_group[0], atom_group[1], 3)
elif len(atom_group) == 3:
len1 = self.st.measure(atom_group[0], atom_group[1])
len2 = self.st.measure(atom_group[1], atom_group[2])
len3 = self.st.measure(atom_group[0], atom_group[2])
if len1 <= len2 and len1 <= len3:
self.setBondOrder(atom_group[0], atom_group[1], 3)
if len2 <= len1 and len2 <= len3:
self.setBondOrder(atom_group[1], atom_group[2], 3)
if len3 <= len1 and len3 <= len2:
self.setBondOrder(atom_group[0], atom_group[2], 3)
else:
debug(
" 4 or more potential triple bond atoms in one group!!")
[docs] def assignAromaticRing(self, ring, ring_group_atoms):
"""
Returns ASSIGNED_AROMATIC if everything is okay.
Returns NOT_AROMATIC if the ring is not aromatic.
Returns NOT_ASSIGNED if this ring should be considered later.
"""
debug("\n------ assignAromaticRing(): ring: %s ---" % ring)
ringatomtypes_dict = self.determineAromaticAtomTypes(
ring, ring_group_atoms)
try:
bonders = self.findAromaticBonders(ring, ringatomtypes_dict)
except RuntimeError as err:
if "Not aromatic" in str(err):
return NOT_AROMATIC
elif "Not assigned" in str(err):
return NOT_ASSIGNED
raise
if bonders:
self.assignAromaticBonders(ring, bonders)
return ASSIGNED_AROMATIC
return NOT_ASSIGNED
[docs] def findAromaticBonders(self, ring, ringatomtypes_dict):
"""
Returns a list of suggested bonding atoms in the specified aromatic ring.
Returns [] if no bonds should be made.
If "required_only" is set, return only the REQUIRED BONDERS and REQUIRED NON-BONDERS
Raises RuntimeError("Not aromatic") if the ring is not aromatic.
Raises RuntimeError("Not assigned") if the ring could not be assigned.
"""
# FIXME Consider caching the list of exocyclic bonds
def get_exocyclic_bond(ring_atom):
for bond in self.st.atom[ring_atom].bond:
if bond.atom2.index not in ring:
return bond
return None
def get_ring_neighbors(ring_atom):
neighbors = []
for bond in self.st.atom[ring_atom].bond:
ni = bond.atom2.index
if ni in ring:
neighbors.append(ni)
return neighbors
# Key: bonder atom number; value: score from 0.0 (required non-bonder)
# to 1.0 (required-bonder)
bonder_scores = {}
def get_required_bonders():
return ([a for a, score in bonder_scores.items() if score == 1.0])
def get_required_non_bonders():
return ([a for a, score in bonder_scores.items() if score == 0.0])
def get_potential_bonders():
return ([a for a, score in bonder_scores.items() if score > 0.0])
def set_required_bonder(a):
bonder_scores[a] = 1.0
def set_required_non_bonder(a):
bonder_scores[a] = 0.0
def get_sorted_bonders_and_scores():
decorated = [(score, a) for (a, score) in bonder_scores.items()]
return sorted(decorated)
def log_bonders(msg):
""" Report bonders and their scores, sorted by the score """
if DEBUG_AROMATIC:
decorated = get_sorted_bonders_and_scores()
bonder_strings = [
"%i: %.1f" % (a, score) for (score, a) in decorated
]
debugbonders(msg + " " + ", ".join(bonder_strings))
if not len(ring) in [5, 6]:
debug(" ERROR! not 5 or 6 membered ring!")
raise RuntimeError("Not aromatic")
# Initialize bonder scores:
for a in ring:
if a in ringatomtypes_dict["RNB"]:
bonder_scores[a] = 0.0
elif a in ringatomtypes_dict["RB"]:
bonder_scores[a] = 1.0
elif a in ringatomtypes_dict["N2"]:
bonder_scores[a] = 0.7
else:
bonder_scores[a] = 0.5
if len(get_required_non_bonders()) >= len(ring) - 1:
debug("All req-non-b: Bond orders are already assigned.")
return [] # No need to do further assignment
# Handle special cases when all are required bonders:
# for a in ringatomtypes_dict["RB"]:
# set_required_bonder(a)
# FIXME this will not work for 5-membered rings!
if len(get_required_bonders()) == len(ring):
return get_required_bonders()
if len(get_required_bonders()) == 5 and len(ring) == 6 and len(
get_required_non_bonders()) == 0:
# The whole 6-membered ring should get bonds added.
return ring
if len(get_required_bonders()) == 4 and len(ring) == 5 and len(
get_required_non_bonders()) == 0:
# 4 of the atoms in the 5-membered ring should get bonds added.
return get_required_bonders()
num_os = 0
for a in get_required_non_bonders():
if self.st.atom[a].element in ['O', 'S']:
num_os += 1
if num_os > 1 and len(ring) == 5:
debug(" MORE THAN 1 O or S in the ring! Not aromatic!!")
raise RuntimeError("Not aromatic")
CO_members = ringatomtypes_dict["CO"]
XDXAR_members = ringatomtypes_dict["XDXAR"]
XDXCH2AR_members = ringatomtypes_dict["XDXCH2AR"]
XDX_members = ringatomtypes_dict["XDX"]
N3_members = ringatomtypes_dict["N3"]
NAr_members = ringatomtypes_dict["NAr"]
for n in N3_members[:]:
if self.st.atom[n].formal_charge == 1:
set_required_bonder(n)
# Do not consider for the amide search, below:
N3_members.remove(n)
N_members = ringatomtypes_dict["N2"] + ringatomtypes_dict[
"N3O"] # + ringatomtypes_dict["NAr"]
# OBJECTIVE: make it so that there are 2, 4, or 6 bonders.
# If there are 2 bonders, they have to be next to each other
# If there are 4 bonders, each bonder needs to have another neighbor in
# potential_bonders.
# FIXME !!!!!!!!!
for a in XDXCH2AR_members:
set_required_non_bonder(a)
for n in self.getNeighbors(a):
if n not in ring:
debug("Group XDXCH2AR")
self.setBondOrder(a, n, 2)
log_bonders("Initial bonder scores:")
# Fix for 2g01, part of PPREP-940
# FIXME we need to make this rule more general
if len(get_potential_bonders()) != 4:
for n3o in ringatomtypes_dict["N3O"]:
set_required_non_bonder(n3o)
# Ev:96621 Search for nitrogens which are in 2 (or more) aromatic
# rings:
for n in NAr_members:
# Do not make any double-bonds to this nitrogen (to avoid N+)
# arrangements in bicyclic systems.
if n in get_potential_bonders():
set_required_non_bonder(n)
# Search for amides within the aromatic ring:
amide_n = []
amide_c = []
for co in CO_members:
for n in N3_members:
if self.st.areBound(co, n):
set_required_non_bonder(n)
for n in N_members:
if self.st.areBound(co, n):
if n not in amide_n:
amide_n.append(n)
if co not in amide_c:
amide_c.append(co)
for c in amide_c:
for n1 in self.getNeighbors(c):
if n1 in ring and n1 in get_required_bonders():
for n2 in self.getNeighbors(n):
if n2 != c and n2 in amide_c:
debug("!!!!! CO--R.B.--CO FOUND!")
len1 = 0
len2 = 0
for c1n in self.getNeighbors(c):
if c1n not in ring:
len1 = self.st.measure(c, c1n)
for c2n in self.getNeighbors(n2):
if c2n not in ring:
len2 = self.st.measure(n2, c2n)
debug("len1= %s" % len1)
debug("len2= %s" % len2)
if len(ring) == 6:
if len1 > len2 and len1 > 1.3:
amide_c.remove(c)
break
elif len2 > 1.3:
amide_c.remove(n2)
break
else:
if len1 > len2:
amide_c.remove(c)
break
else:
amide_c.remove(n2)
break
for co in amide_c:
for atom in self.getNeighbors(co):
natom = self.st.atom[atom]
if (natom.element in ['O', 'S']) and len(natom.bond) == 1:
debug("Functional group amide_c O/S with one neighbor")
self.setBondOrder(co, atom, 2)
set_required_non_bonder(co)
break
set_required_non_bonder(co)
# Made Amide nitrogens non-bonders when the N is surrounded by 2 ketone Cs:
for n in amide_n:
neighbors = self.getNeighbors(n)
co_count = 0
for a in neighbors:
if a in amide_c:
co_count += 1
if co_count > 1:
try:
set_required_non_bonder(n)
except:
pass
# PPREP-739 If 2 ortho carbonyls are present in the ring,
# make them both double bonds, if they are short enough:
if len(CO_members) == 2 and \
self.areAtomsOrtho(ring, CO_members[0], CO_members[1]):
# If both are pretty short, make them carbonyls:
both_short = True
for co in CO_members:
if get_exocyclic_bond(co).length > 1.4:
both_short = False
if both_short:
for co in CO_members:
exo_bond = get_exocyclic_bond(co)
self.setBondOrder(co, exo_bond.atom2, 2)
set_required_non_bonder(co)
# Ev:121918 (1w4l) Determine if any C-O bonds should be double, no matter what:
for co in CO_members:
exo_bond = get_exocyclic_bond(co)
exo_atom = exo_bond.atom2
if exo_bond.length < 1.3 and len(get_potential_bonders()) != 6:
self.setBondOrder(co, exo_atom, 2)
set_required_non_bonder(co)
N2N3CO = []
for i in N_members:
if i in get_potential_bonders():
N2N3CO.append(i)
for i in N3_members:
if i in get_potential_bonders():
N2N3CO.append(i)
for i in CO_members:
if i in get_potential_bonders():
N2N3CO.append(i)
# SEARCH FOR N2N3COs (atoms that can either bond or non-bond)
# THAT ARE SURROUNDED BY NON-BONDERS
# Go through potential bonders:
bonders2 = []
for a in get_potential_bonders():
bonders2.append(a)
for a in bonders2:
if a in get_required_bonders():
continue
non_bonder_neighbors = 0
for n in self.getNeighbors(a):
if n in ring and n not in get_potential_bonders():
non_bonder_neighbors += 1
if non_bonder_neighbors > 1:
if a in N2N3CO:
N2N3CO.remove(a)
set_required_non_bonder(a)
if (len(N_members) + len(CO_members)) > 0:
debugbonders(
" making N3s non potential_bonders, because N2s or COs are present"
)
for a in N3_members:
if a in get_potential_bonders(
) and a not in get_required_bonders():
set_required_non_bonder(a)
# Look for a required bonder surrounded by a non_bonder and a potential
# bonder, and make the potential bonder a bonder
log_bonders("Bonder scores after adding XDXs:")
if XDX_members:
LN2CO = len(N_members) + len(CO_members)
add_all = False
if len(N_members) + len(CO_members) == 0:
fut_bonders = len(get_potential_bonders()) - len(XDX_members)
if fut_bonders in [2, 4, 6]:
add_all = True
else:
add_all = False
elif LN2CO == 1 and len(ring) == 6:
add_all = False
elif LN2CO == 0 and len(ring) == 5:
add_all = False
else:
add_all = True
if not add_all:
debug(
" Can add all but last potential non_bonder, because there is no N2, N3, or CO present"
)
if len(XDX_members) > 0:
last = XDX_members[len(XDX_members) - 1]
for i in XDX_members:
# the XDX we are going through is not the one least
# likely to double bond
if i != last and i not in get_required_bonders():
set_required_non_bonder(i)
for n in self.getNeighbors(i):
if n not in ring:
debug("Ring group XDX")
self.setBondOrder(i, n, 2)
break
elif len(ring) == 5:
# This code is needed to render heme-like groups correctly:
# More specifically: Ar-C-Ar, where the carbon needs to get
# a double bond to one of the aromatic rings.
# (In other words, "heme-like" here refers to an arrangement where
# there is a ring that is double-bonded to a CH2, which is
# single-bonded to another ring. This occurs in hemes).
for i in XDX_members:
if i not in get_required_bonders():
for n in self.getNeighborsObj(i):
if n.index not in ring:
# This check is a fix for PPREP-1004
if self.isAtomBetweenTwoRings(n):
debug("Ring group XDX")
self.setBondOrder(i, n, 2)
set_required_non_bonder(i)
break
log_bonders("Bonder scores after subtracting XDXs:")
# Add another non_bonders if needed: 1, 3, or 5 potential_bonders!
if len(get_potential_bonders()) in [1, 3, 5]:
# Even number of potential_bonders. Try to eliminate an N3 atom:
non_required_N3s = []
bonders_N3s = []
for a in N3_members:
if a in get_potential_bonders():
if a not in get_required_bonders():
non_required_N3s.append(a)
else:
bonders_N3s.append(a)
# Ev:101645 Only don't bond N3 members if there is an odd number of them:
if len(non_required_N3s) >= 1:
# There is at least one non-required N3 bonder. Pick one arbitrarily to remove:
set_required_non_bonder(non_required_N3s[0])
elif len(bonders_N3s) >= 1:
# There are no non-required N3 potential_bonders. Pick any one arbitrarily to remove:
set_required_non_bonder(N3_members[0])
# Make all non-bondable CO's double bonds:
for c in CO_members:
if c not in get_potential_bonders():
for n in self.getNeighbors(c):
natom = self.st.atom[n]
if natom.element == 'O' and len(natom.bond) == 1:
debug("Functional group CO found")
self.setBondOrder(c, n, 2)
set_required_non_bonder(c)
break
# Make all double-bonds that are currently present in the ring required bonders:
# Ev:127473 (1fvt)
if len(ring) == 5:
for a1, a2 in self.forEdgeInRing(ring):
bond = self.st.getBond(a1, a2)
if bond.order == 2:
bond.order = 1
set_required_bonder(a1)
set_required_bonder(a2)
debugbonders(
" Existing double bond added as required_bonders: %i, %i"
% (a1, a2))
if len(get_potential_bonders()) == 0:
debug(" NO BONDERS!")
raise RuntimeError("Not aromatic")
# If the number of potential_bonders is odd and there is a nitrogen
# that can be made a non=bonder, do so, giving priority to non-amide
# nitrogens.
if len(get_potential_bonders()) in [1, 3, 5]:
# First try to remove any amide nitrogens from bonders:
for a in N_members:
if a in amide_n:
# For each amide
# If there is more than one, bond the one that is not an amide
if a in get_potential_bonders(
) and a not in get_required_bonders():
set_required_non_bonder(a)
break
# Then try to remove any nitrogens from bonders:
# NOTE: Removing the check for 5-membered ring will break 1ej3
if len(ring) == 5 and len(get_potential_bonders()) in [1, 3, 5]:
for a in N_members:
# There are no amide nitrogens. Add anyone that is left.
if a not in get_required_bonders():
set_required_non_bonder(a)
break
if len(ring) == 6 and len(get_required_non_bonders()) == 1:
nb = get_required_non_bonders()[0]
warning("WARNING: Ring %s has 1 non-bonder (%s)" % (ring, nb))
# Check if one of the neighbors is part of another ring,
# if so, make it a non-bonder (if both match, "randomly" pick
# one). This help especially when the "other" ring is
# in reality aromatic, but is not assigned as such by ABO.
# For example 3a2c (see PPREP-945).
for n in get_ring_neighbors(nb):
num_rings = len(self.getAtomsRings(n))
if num_rings > 1:
set_required_non_bonder(n)
break
if len(get_potential_bonders()) == 5 and len(ring) == 5:
debug(" RING IS NOT AROMATIC!!! NO NON-BONDERS!")
raise RuntimeError("Not aromatic")
if len(get_potential_bonders()) == 3 and len(
get_required_bonders()) == 2:
atom1, atom2 = tuple(get_required_bonders())
if self.st.areBound(atom1, atom2):
return get_required_bonders()
if len(get_potential_bonders()) == 5:
# PPREP-963 If one of these potential bonders is part of
# another aromatic ring, then throw it out (hopefully it
# will get a double bond when that ring will be assigned)
decorated = get_sorted_bonders_and_scores()
while len(decorated) > 4:
decorated.pop(0)
return [a for score, a in decorated]
return get_potential_bonders()
[docs] def groupAromaticRings(self):
# return a list of (ring lists)
all_groups = []
unsorted_rings = self.aromatic_rings[:]
while unsorted_rings != []:
# new group
entry_set = []
ring = unsorted_rings[0]
unsorted_rings.remove(ring)
entry_set = get_rings_in_group(ring, unsorted_rings)
for r in entry_set:
if r in unsorted_rings:
unsorted_rings.remove(r)
# SORT THE GROUP SET:
# put the all carbon rings first in the order:
# SEARCH FOR: ####################################
# X---X
# / \
# X O X---X
# \ / \
# X---X O X
# / \ /
# X O X---X
# \ /
# X---X
sorted_set = []
# ADD MORE CODE
# add the rest of the rings.
for r in entry_set:
if r not in sorted_set:
sorted_set.append(r)
all_groups.append(sorted_set)
self.aromatic_ring_groups = all_groups
[docs] def getNeighboringRings(self, ring, rings):
"""
Return a list of rings that share atoms with the specified ring.
"""
neighbor_rings = []
for nr in rings:
if ring != nr:
if do_rings_share_atoms(ring, nr):
if nr not in neighbor_rings:
neighbor_rings.append(nr)
return neighbor_rings
[docs] def isKetoneCarbon(self, atom):
if self.getElement(atom) == "C":
for natom in self.getNeighbors(atom):
if self.getElement(natom) == "O" and self.getNumNeighbors(
natom) == 1:
return True
return False
[docs] def assignAromaticRingGroup(self, group):
"""
Assigns bond orders of aromatic rings (5 & 6 membered) which are
arranged in a group (multi-cyclic systems).
"""
debug("\nWorking on aromatic ring group: %s" % group)
# Make a list of all atoms in this ring group:
ring_group_atoms = set()
for r in group:
ring_group_atoms.update(r)
debug("RING GROUP ATOMS: %s" % ring_group_atoms)
# Sort rings by priority:
def get_sort_key(ring):
priority = 0 # lower number = higher priority (needs to be assigned first)
ringatomtypes_dict = self.determineAromaticAtomTypes(
ring, ring_group_atoms)
num_required_bonders = len(ringatomtypes_dict["RB"] +
ringatomtypes_dict["XDXAR"])
num_required_non_bonders = len(ringatomtypes_dict["RNB"])
# Assign 5-membered rings that have only one way they can be assigned (3bki)
if len(ring) == 5:
if num_required_non_bonders == 1 or num_required_bonders == 4:
priority -= 30
# Assign other rings that have only one way they can be assinged (1p2a):
if len(ring) in [5, 6]:
if (num_required_bonders +
num_required_non_bonders) == len(ring):
priority -= 20
# This is needed to get 1i6v right:
if num_required_bonders >= 5:
debug("RING has %i required non-bonders; increasing priority" %
num_required_bonders)
priority -= num_required_bonders
# Give rings that have ketones higher priority:
has_ketone = False
for atomnum in ring:
has_ketone = self.isKetoneCarbon(atomnum)
if has_ketone:
break
if has_ketone:
priority -= 10
debug("RING HAS KETONE; increasing priority")
debug(' returning priority of %i' % priority)
return priority
sorted_rings = sorted(group, key=get_sort_key)
debug('RINGS SORTED BY PRIORITY: %s' % sorted_rings)
rings_not_done = sorted_rings
# Take care of the more complicated cases:
num_times_tried = {}
while rings_not_done != []:
ring = rings_not_done[0]
out = self.assignAromaticRing(ring, ring_group_atoms)
if out == ASSIGNED_AROMATIC:
rings_not_done.remove(ring)
elif out == NOT_AROMATIC:
rings_not_done.remove(ring)
else:
# Did not assign the ring successfully
if len(rings_not_done) == 1:
# This was the last ring; do not try any more assignments:
rings_not_done.remove(ring)
else:
try:
num_times_tried[ring[0]] += 1
if num_times_tried[ring[0]] > 2:
# Give up, the ring is not aromatic
rings_not_done.remove(ring)
else:
# Append ring to the end of the list:
rings_not_done.remove(ring)
rings_not_done.append(ring)
except KeyError:
num_times_tried[ring[0]] = 1
[docs] def assignAromaticRingOrders(self):
"""
Assign all aromatic rings
"""
debug("\nassignAromaticRingOrders(): aromatic rings: %s" %
self.aromatic_ring_groups)
for group in self.aromatic_ring_groups:
self.assignAromaticRingGroup(group)
debug("Done assigning aromatic ring bond orders.")
[docs] def fixKetones(self):
""" This function adds double bonds to ketones and aldehydes when necesary. It should be run only after ALL other bond orders have been assigned. """
debug("Fixing ketones...")
for a in self.atoms:
atom = self.st.atom[a]
if atom.element == 'O':
neighbors = self.getNeighbors(atom)
if len(neighbors) != 1 or not self.isOnlySingleBonded(atom):
continue
catom = self.st.atom[neighbors[0]]
if catom.element != 'C':
continue
if not self.isOnlySingleBonded(catom):
continue
c_num_n = len(catom.bond)
if c_num_n > 3:
continue
the_bond = atom.bond[1]
bond_length = self.st.measure(the_bond.atom1, the_bond.atom2)
c_bond_angle = calculate_average_angle(self.st, catom)
# FIXME: Create a more flexible scoring system....
if (c_num_n == 3 and c_bond_angle >= 119) \
or (c_num_n == 3 and c_bond_angle > 118 and bond_length < 1.4) \
or (c_num_n == 3 and c_bond_angle >= 117 and bond_length < 1.3) \
or (c_num_n < 3 and bond_length < 1.3):
self.setBondOrder(atom, catom, 2)
[docs] def fix_N4s(self):
"""
Sets the macromodel atom type of Positively Charged Nitrognes to N4(31/sp2) or N5(32/sp3).
"""
for a in self.atoms:
atom = self.st.atom[a]
if atom.element == 'N':
n_len = len(atom.bond)
if n_len not in [3, 4]:
continue
# Skip nitrogens with ZOBs:
# See 2bzh, PPREP-940
non_single_bonds_present = False
zobs_present = False
higher_orders_present = False
for bond in atom.bond:
if bond.order > 1:
higher_orders_present = True
elif bond.order == 0:
zobs_present = True
if zobs_present:
continue
if n_len == 3:
# 3 bonds, and one is a double bond:
if higher_orders_present:
atom.atom_type = 31
atom.formal_charge = 1
elif n_len == 4:
# 4 bonds to this nitrogen, tetrahedral:
atom.atom_type = 32
atom.formal_charge = 1
[docs] def sortBondableAtomsByGroups(self, potential_double_bond_atoms):
""" Takes the list of input atoms, and puts them into groups, and
returns a list of those groups. Atoms are considerred in the same group
if they are bonded together (in the same chain).
"""
group_set = []
my_copy = list(potential_double_bond_atoms) # make a copy
while my_copy:
atom_num = my_copy.pop()
entry_set = [atom_num]
new_atoms = [atom_num]
while new_atoms != []: # while didn't just do last atom
for atom in new_atoms:
for n in self.getNeighbors(atom):
if n in my_copy and n not in entry_set:
entry_set.append(n)
my_copy.remove(n)
new_atoms.append(n)
new_atoms.remove(atom)
group_set.append(entry_set)
return group_set
[docs] def assignTripleBonds(self):
potential_triple_bond_atoms = self.findPotentialTripleBondAtoms()
# This code puts the atoms in the same group if they are bound to each other
while potential_triple_bond_atoms != []:
atom_num = potential_triple_bond_atoms[0]
potential_triple_bond_atoms.remove(atom_num)
entry_set = [atom_num]
new_atoms = [atom_num]
while new_atoms != []: # while didn't just do last atom
for atom in new_atoms:
for n in self.getNeighbors(atom):
if n in potential_triple_bond_atoms and n not in entry_set:
entry_set.append(n)
potential_triple_bond_atoms.remove(n)
new_atoms.append(n)
new_atoms.remove(atom)
if len(entry_set) >= 2:
self.assignGroupTripleBonds(entry_set)
[docs] def assignChainOrders(self):
debug("\n --- assignChainOrders(): ---")
potential_double_bond_atoms = self.getPotentialDoubleBondAtoms()
group_set = self.sortBondableAtomsByGroups(potential_double_bond_atoms)
for group in group_set:
if len(group) > 1:
self.assignGroupDoubleBonds(group)
potential_double_bond_atoms = self.getPotentialDoubleBondAtoms()
group_set2 = self.sortBondableAtomsByGroups(potential_double_bond_atoms)
if group_set2 != []:
for group in group_set2:
if group not in group_set and len(group) > 1:
self.assignGroupDoubleBonds(group)
# Go through atoms that HAVE to double bond, but are not double bonded
# at this point.
# if a single required bonder atom is found, look for something that it
# can double bond to...
[docs] def findPotentialTripleBondAtoms(self):
"""
Returns a list of atoms that can potentially triple-bond.
"""
potential_triple_bond_atoms = []
for atom_num in self.atoms:
iatom = self.st.atom[atom_num]
if not self.isOnlySingleBonded(iatom):
continue
element = iatom.element
num_n = self.getNumNeighbors(iatom)
if num_n == 2:
bond_angle = calculate_average_angle(self.st, atom_num)
if bond_angle > 145 and element in ['C', 'N', 'Si']:
potential_triple_bond_atoms.append(atom_num)
elif num_n == 1 and element in ['C', 'N', 'Si', 'O']:
# Terminal single-bonded atom Ev:69974
potential_triple_bond_atoms.append(atom_num)
return potential_triple_bond_atoms
[docs] def getPotentialDoubleBondAtoms(self):
"""
Returns a list of atoms that can potentially double bond.
Only bond angles are considered.
"""
# Ev:119181 Make a list of atoms that are part of small rings
# (5 members). The minimum sp2 angle is smaller for
# these atoms.
atoms_in_small_rings = set()
for ringatoms in self.all_rings:
if len(ringatoms) <= 5:
atoms_in_small_rings.update(ringatoms)
potential_double_bond_atoms = []
# Not sure why we have a maximum angle. Perhaps for triple bonds?
max_sp2_angle = 155.0
min_sp2_angle = 105.0 # !! CHANGING THIS MAY BREAK SOMETHING ELSE !!
# Loosen up the rule if the atom is in a small ring (constrainted):
# Not considering bond lengths !!! CHANGING THIS MAY BREAK SOMETHING
# ELSE!
min_sp2_5mem_nodist_angle = 104.3
min_sp2_5mem_withdist_angle = 100.0 # Considering bond lengths
for a in self.atoms:
iatom = self.st.atom[a]
if not self.isOnlySingleBonded(iatom):
continue
if iatom.element not in ['C', 'N', 'Si']:
continue
num_n = self.getNumNeighbors(iatom)
if num_n in (2, 3):
bond_angle = calculate_average_angle(self.st, a)
if bond_angle < max_sp2_angle:
# If the atom is NOT in a small ring, only consider angles:
if bond_angle > min_sp2_angle:
potential_double_bond_atoms.append(a)
elif a in atoms_in_small_rings:
# Ev:119181 5 membered rings are treated specially, since
# bond angles do not tell much there:
if bond_angle > min_sp2_5mem_nodist_angle:
potential_double_bond_atoms.append(a)
elif bond_angle > min_sp2_5mem_withdist_angle:
# Check whether at least one bond is short enough:
short_bonds_present = False
for bond in iatom.bond:
dist_threshold = get_single_bond_length(
iatom, bond.atom2)
if bond.length < (dist_threshold - 0.1):
short_bonds_present = True
break
if short_bonds_present:
potential_double_bond_atoms.append(a)
elif num_n == 1:
# Terminal atoms with unknown hybridization
if iatom.bond[1].order == 1:
potential_double_bond_atoms.append(a)
return potential_double_bond_atoms
[docs]def get_ring_atom_coords(st, ring_atoms):
"""
Get the coordinates of the ring atoms as a numpy array.
If `st` has periodic boundary conditions (PBC), the coordinates returned
will all be in the frame of reference of the first atom in ring_atoms.
"""
try:
pbc = infrastructure.PBC(st)
if not ring_atoms:
return numpy.array([])
return numpy.array(pbc.getNearestImages(st, ring_atoms, ring_atoms[0]))
except IndexError:
# The structure does not have PBC properties associated with it.
return numpy.array([st.atom[a].xyz for a in ring_atoms])
[docs]def get_edge_normals(ring_atom_coords):
"""
Get vectors normal to the planes described by each ring-edge and the center
of the ring.
Assumes that atoms are in order.
"""
center = numpy.mean(ring_atom_coords, 0)
edge_vectors = []
shifted = numpy.roll(ring_atom_coords, 1, 0)
for a1, a2 in zip(ring_atom_coords, shifted):
# Calculate the ring-center -> atom vectors:
atom1_vector = a1 - center
atom2_vector = a2 - center
normal_vector = numpy.cross(atom1_vector, atom2_vector)
normal_unit_vector = normal_vector / numpy.linalg.norm(normal_vector)
edge_vectors.append(normal_unit_vector)
return edge_vectors
[docs]def calculate_planarity(st, ring_atoms):
"""
Calculate the ring planarity.
Very planar rings have a planarity of < 1.
Assumes that atoms are in order.
"""
ring_atom_coords = get_ring_atom_coords(st, ring_atoms)
edge_vectors = get_edge_normals(ring_atom_coords)
# Calculate the ring (average) vector:
ring_vector = numpy.average(edge_vectors, 0)
# Calculate average deviation from the ring vector:
diffs = []
for v in edge_vectors:
d = numpy.absolute(numpy.subtract(ring_vector, v))
diffs.append(d)
ave_diff = numpy.average(diffs, 0)
planarity = numpy.average(ave_diff) * 100
return planarity
def _residue_atom_subset(res, atoms):
"""
Return atoms of the residue residue which are also in the specified <atoms>
list. Will return an empty list if this residue already has bond orders
assigned.
"""
res_atoms = []
for a in res.atom:
ai = a.index
# Skip this residue if any atom has double bonds:
total_orders = 0
for bond in a.bond:
order = bond.order
total_orders += order
if order > 1:
debug("WARNING: Residue %s has bond orders. Skipping..." % res)
return [] # do not assign this residue
# Give a warning if an atom has too many bonds (due to PDB error):
# If so, this residue will be skipped
if total_orders > MAX_VALENCE[a.atomic_number]:
print("WARNING: Assign Bond Orders: Atom %i (%s) has too many bonds (%i)" \
% (ai, a.element, total_orders))
print(" Skipping", res)
return [] # do not assign this residue
# If <atoms> specified, assign only if this atom is in it:
if not atoms or ai in atoms:
res_atoms.append(ai)
return res_atoms
[docs]class CCDStructureMismatchError(RuntimeError):
"""raise this error when structure in CCD template doesn't match HET"""
[docs]class SmilesGenerateError(RuntimeError):
"""raise this error when RDKit fails to generate SMILES"""
def _assign(ccd_st, ccd_order, st, st_order):
# Assign bond orders and formal charges based on the mapping:
# Assign bond orders and formal charges based on the mapping:
assigned_bonds = []
for bond in ccd_st.bond:
if bond.atom1.index not in ccd_order or bond.atom2.index not in ccd_order:
# This atom is present in <st> only.
continue
a1_index = ccd_order.index(bond.atom1.index)
a2_index = ccd_order.index(bond.atom2.index)
order = bond.order
if order != 1:
st_atom1 = st_order[a1_index]
st_atom2 = st_order[a2_index]
st_bond = st.getBond(st_atom1, st_atom2)
if st_bond is None:
# These 2 atoms are not bonded in the original CT; e.g. 3QCR
pass
else:
st_bond.order = order
_a1, _a2 = sorted([st_atom1, st_atom2])
assigned_bonds.append((_a1, _a2, order))
for het_ai, ccd_ai in zip(st_order, ccd_order):
st_atom = st.atom[het_ai]
ccd_atom = ccd_st.atom[ccd_ai]
st_atom.formal_charge = ccd_atom.formal_charge
return assigned_bonds
def _find_best_mapping(st, het_atoms, ccd_st):
"""
Find the best way that <het_atoms> subset of <st> can be mapped to <ccd_st>,
considering all symmetrical combinations.
"""
# Save original atom indices:
for atom in st.atom:
atom.property['i_abo_original_index'] = atom.index
het_heavy_atoms = [ai for ai in het_atoms if st.atom[ai].atomic_number != 1]
# Create a substructure CT with only this het in it, plus few atoms of the
# receptor for covalently bound ligands (if present):
het_asl = analyze.generate_asl(st, het_atoms)
delete_atoms = analyze.evaluate_asl(st,
"NOT (withinbonds 2 (%s))" % het_asl)
if delete_atoms:
het_st = st.copy()
rmap = het_st.deleteAtoms(delete_atoms, renumber_map=True)
het_heavy_atoms = [rmap[anum] for anum in het_heavy_atoms]
else:
# Het is not making any covalent bonds
het_st = st
mapper = CCDAtomMapper(use_chirality=False)
mapper.optimize_mapping = True
ccd_heavy_atoms = [a.index for a in ccd_st.atom if a.atomic_number != 1]
if len(ccd_heavy_atoms) != len(het_heavy_atoms):
raise CCDStructureMismatchError(
'CCD and het structures have different number of atoms.')
# Re-order both structures by most ideal combination:
try:
_, reordered_st = mapper.reorder_structures(ccd_st, ccd_heavy_atoms,
het_st, het_heavy_atoms,
ccd_st)
except atom_mapper.ConformerError:
raise CCDStructureMismatchError(
'CCD and het structure graphs are not isomorphic.')
# Since <st> was now re-ordered, and we want to have the original CT
# atom order unmodified, we will only need to use the identity mapping
# between the <ccd_st> and <st> and not the structure itself. Only consider
# heavy atoms, to handle cases where het has hydrogens or covalently bonded
# protein atoms.
ordered_orig_heavy = (a.property['i_abo_original_index']
for a in reordered_st.atom
if a.atomic_number != 1)
ordered_orig_st_atoms = [ai for ai in ordered_orig_heavy if ai in het_atoms]
# Sanity check:
ccd_elements = [ccd_st.atom[ai].element for ai in ccd_heavy_atoms]
st_elements = [st.atom[ai].element for ai in ordered_orig_st_atoms]
if ccd_elements != st_elements:
raise CCDStructureMismatchError(
"Atom element mismatch between the CCD and het record:\n%s\n%s" %
(ccd_elements, st_elements))
for atom in st.atom:
del atom.property['i_abo_original_index']
return ccd_heavy_atoms, ordered_orig_st_atoms
def _assign_from_smiles(st, atoms, smiles):
"""
Assign the bond orders for the <atoms> in <st> based on the <smiles>.
:param st: Structure containing the het.
:type st: `structure.Structure`
:param atoms: Atoms in the het group.
:type atoms: list of ints
:return: Bonds that were assigned, as a list of (atom1 index, atom2 index,
bond order).
:rtype: list of (int, int, int)
:raises RuntimeError if het could not be assigned from CCD record for
any reason.
"""
# Create a CT from the CCD SMILES:
try:
ccd_mol = Chem.MolFromSmiles(smiles)
ccd_st = rdkit_adapter.from_rdkit(ccd_mol)
except Exception as err:
msg = "Failed to generate a CT from SMILES: %s" % smiles
msg += '\n---RDkit error message:---\n%s' % str(err).strip()
raise SmilesGenerateError(msg)
assert atoms
ccd_order, st_order = _find_best_mapping(st, atoms, ccd_st)
assigned_bonds = _assign(ccd_st, ccd_order, st, st_order)
return assigned_bonds
def _get_ccd_smiles(hetname):
"""
Return the SMILES string for the given het name in the CCD database.
:param hetname: 3-letter het name
:type hetname: str
:return: SMILES string or None
:rtype: str or None
"""
if hetname in CCD_CACHE:
return CCD_CACHE[hetname]
filename = hetname + '.cif'
try:
fh = zipfile.ZipFile(CCD_FILE).open(filename)
except KeyError:
# There is no CCD record for this residue.
return None
# Read the SMILES for the corresponding CCD record:
for line in io.TextIOWrapper(fh).readlines():
if line[0] in ('#', ';'):
continue
words = line.split()
if len(words) < 5:
continue
if words[1] == 'SMILES_CANONICAL' and words[2] == 'CACTVS':
# strip any leading and trailing double-quotes, if present:
smiles = words[4].strip('"')
CCD_CACHE[hetname] = smiles
return smiles
# No SMILES in CCD entry for some reason
return None
[docs]def assign_st(st,
atoms=None,
neutralize=False,
problem_only=False,
skip_assigned_residues=True,
use_ccd=False,
_logger=None):
"""
Perform the assign-bond-order algorithm on the supplied structure.
If the atom list is specified, only those atoms are fixed.
:param st: Structure to assign bond orders in
:type st: L{structure.Structure)
:param atoms: List of atoms to assign. By default all atoms as assigned.
:type atoms: list of ints
:param neutralize: Whether to protonate carboxylic acids.
:type neutralize: bool
:param problem_only: Whether to assign only to atoms with PDB convert
problem of 4 (orange atoms). Not compatible with <atoms> argument.
:type problem_only: bool
:param skip_assigned_residues: If True, bond orders are not assigned to
residues that have double or triple bonds, even if that residue's atoms
are in <atoms>. Not compatible with <problem_only> option.
:type skip_assigned_residues: bool
:param use_ccd: Whether to use the Chemical Component Dictionary for hets
that have records there. Not supported for covalent ligands.
:type use_ccd: bool
:param _logger: logger to send messages to
:type _logger: logger
:return: List of (atom1, atom2, order) for every bond whose order was
altered.
:rtype: list of (int, int, int)
"""
if problem_only:
unassigned_asl = "(atom.i_m_pdb_convert_problem 4)"
atoms = analyze.evaluate_asl(st, unassigned_asl)
if not atoms: # None have problem
return
# For record, set of (atom1, atom2, order) for every bond whose order was
# changed, where atom1 < atom2.
all_assigned_bonds = set()
atoms_to_assign = []
for res in st.residue:
pdbres = res.pdbres
# Skip waters:
if pdbres == "HOH " and len(res.atom) in (1, 3):
continue
if skip_assigned_residues:
if DEBUG:
print('Checking residue:', res)
# Get a list of atoms from this residue to assign orders to:
res_atoms_to_assign = _residue_atom_subset(res, atoms)
if not res_atoms_to_assign:
# Residue has double bonds
continue
else:
# This residue had no double bonds (is unassigned)
# res_atoms_to_assign = all residue atoms that are also in <atoms>
res_assign_atoms = res_atoms_to_assign
else:
res_assign_atoms = [
atom.index
for atom in res.atom
if atoms is None or atom.index in atoms
]
if use_ccd:
assigned_bonds = attempt_ccd_assignment(res, res_assign_atoms,
_logger)
if assigned_bonds:
all_assigned_bonds.update(assigned_bonds)
continue
atoms_to_assign.extend(res_assign_atoms)
# Assign bond orders based on geometry for the remaining atoms:
abo = AssignBondOrders(st, atoms_to_assign, neutralize)
assigned_bonds = abo.assign()
for item in assigned_bonds:
all_assigned_bonds.add(item)
# Return a list of bonds that have been assigned:
return all_assigned_bonds
[docs]def attempt_ccd_assignment(res, res_assign_atoms=None, _logger=None):
"""
Attempt to use the CCD record to assign the bond orders. Returns list
of bonds that were assigned. Returns empty list if assignment was not
successful.
:param res: Residue for the het to assign orders to.
:type res: structure._Residue
:param res_assign_atoms: List of atoms (substructure from the residue)
to assign bond orders to. By default all residue atoms are assigned.
:type res_assign_atoms: list(int)
:param _logger: Logger
:type _logger: logging.Logger
:return: List of assigned bonds, as (index1, index2, bond order)
:rtype: list of (int, int, int)
"""
if res_assign_atoms is None:
res_assign_atoms = res.getAtomIndices()
assigned_bonds = []
ccd_smiles = _get_ccd_smiles(res.pdbres.strip())
if not ccd_smiles:
if _logger:
_logger.info('No CCD template for HET \"%s\"' % res.pdbres)
_logger.info(' Falling back to bond order assignment '
'based on geometry.')
ccd_assigned_status = 'HETNAM not in CCD'
else:
try:
assigned_bonds = _assign_from_smiles(res._st, res.getAtomIndices(),
ccd_smiles)
except (CCDStructureMismatchError, SmilesGenerateError) as err:
if _logger:
_logger.warning('CCD template assignment failed for HET '
'\"%s\" due to:\n %s' % (res.pdbres, err))
_logger.warning(' Falling back to bond order assignment '
'based on geometry.')
if isinstance(err, SmilesGenerateError):
ccd_assigned_status = 'CCD failed'
elif isinstance(err, CCDStructureMismatchError):
ccd_assigned_status = 'CCD mismatch'
else:
if _logger:
_logger.info(
'CCD template assignment successfully used for HET '
'\"%s\"' % res.pdbres)
# Het as assigned successfully (even if all bonds are
# single); skip this het.
ccd_assigned_status = 'CCD match'
for atom in res.atom:
atom.property[CCD_ATOM_PROPERTY] = ccd_assigned_status
return assigned_bonds
def _genBondsDict(st, atoms):
"""
Generates the state dictionary for the specified st.
"""
d = {}
for a in st.atom:
anum = a.index
if atoms and anum not in atoms:
continue
if a.atomic_number == 1:
continue
d[anum] = {}
for bond in a.bond:
nnum = bond.atom2.index
if atoms and nnum not in atoms:
continue
if bond.atom2.atomic_number == 1:
continue
d[anum][nnum] = bond.order
return d
[docs]def orders_assigned(st, atoms=None, all=False):
"""
Returns True if all bond orders are OK in the specified structure.
Can be given a list of atoms to check bond orders of. If not list is
specified and all flag is set to True, all atoms are checked; otherwise
only atoms with a PDB convert error (appear orange in Maestro) are checked.
NOTE: atoms and all options are mutually exclusinve.
"""
if all:
atoms = None # Signifies all atoms
elif not atoms:
unassigned_asl = "(atom.i_m_pdb_convert_problem 4)"
atoms = analyze.evaluate_asl(st, unassigned_asl)
if not atoms:
# empty list
return True
bonds_before = _genBondsDict(st, atoms)
st2 = st.copy() # Do not change the original st
assign_st(st2, atoms, neutralize=False)
bonds_after = _genBondsDict(st2, atoms)
return bonds_before == bonds_after
if __name__ == '__main__':
# For testing purposes only
import sys
input_file = sys.argv[1]
output_file = sys.argv[2]
print("Input file:", input_file)
print("Output file:", output_file)
st_num = 0
for st in structure.StructureReader(input_file):
st_num += 1
print("Working on structure %i..." % st_num)
assign_st(st)
build.add_hydrogens(st)
if st_num == 1:
st.write(output_file)
else:
st.append(output_file)
print('Done.')
# EOF