"""
This module contains classes and functions for Core Hopping.
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Pat Lorton
import sqlite3
from past.utils import old_div
from struct import unpack
from scipy.special import comb
from schrodinger.infra import mm
from schrodinger.structutils import block_data
from schrodinger.structutils import smiles as smiles_mod
ANY = 999 # Same as ANY constant of vcs-src/chsr/data_struct_macros.h
ATTACHMENT_BLOCK = 'm_attachment'
def _convert_blob_to_int_array(blob):
"""
This is used to convert a sqlite3 blob into an int array
"""
cb = []
idx = 0
while idx < len(blob):
cb.append(blob[idx:idx + 4])
idx += 4
return [unpack("<L", x)[0] for x in cb]
[docs]class SubstCoreDatabase:
"""
This class can be used to calculate substitution scores, where a core
hopping database has been used to generate passed ligands.
"""
frag_handle = None # Class static variable
[docs] def __init__(self, database_fname):
"""
The class is initialized with the database used for core hopping, then
it is queried with ligands which that core hopping run produced.
"""
self.database_fname = database_fname
self.conn = sqlite3.connect(database_fname)
self.cursor = self.conn.cursor()
self.smiles_gen = smiles_mod.SmilesGenerator(
stereo=smiles_mod.STEREO_FROM_ANNOTATION_AND_GEOM)
# Only need one frag handle for all instances of the class
if SubstCoreDatabase.frag_handle is None:
mm.mmbuild_initialize(mm.error_handler)
mm.mmfrag_initialize(mm.error_handler)
SubstCoreDatabase.frag_handle = mm.mmfrag_new("organic")
mm.mmfrag_set_fragment_name(SubstCoreDatabase.frag_handle,
"Hydrogen")
mm.mmfrag_set_direction(SubstCoreDatabase.frag_handle, "forward")
[docs] def get_subst_score(self, st):
"""
Query the database using the provided Structure to attain a subst score.
"""
smiles = st.property['s_cgch_core_smiles']
sidechain_atoms = set()
for atom in st.atom:
if 'b_cg_iscore' in atom.property:
if not atom.property['b_cg_iscore']:
sidechain_atoms.add(int(atom))
from_atoms = set() # Add all from atoms to set
for atom in st.atom:
if int(atom) not in sidechain_atoms:
for ba in atom.bonded_atoms:
if int(ba) in sidechain_atoms:
# Get the base from in case this atom is a linker
gn_split = atom.growname.replace(' ', '').split('.')
base_from = int(gn_split[0])
from_atoms.add(base_from)
# Make sure the smiles exists inside the database
self.cursor.execute(
"select COUNT(smiles) from core where smiles='%s';" % smiles)
for row in self.cursor:
try:
assert row[0] == 1
except:
raise IndexError("Smiles: '%s' not found in Database: '%s'" %
(smiles, self.database_fname))
self.cursor.execute(
"select n_smiles_matches, smiles_matches FROM core WHERE smiles=='%s' "
% smiles)
# a list of lists containing all smiles matches maps
smiles_matches = []
for n_smiles_matches, smiles_match_blob in self.cursor:
smiles_matches_whole = _convert_blob_to_int_array(smiles_match_blob)
subset_size = old_div(len(smiles_matches_whole), n_smiles_matches)
for i in range(0, len(smiles_matches_whole), subset_size):
smiles_matches.append(smiles_matches_whole[i:i + subset_size])
self.cursor.execute(
"SELECT times_found, subst_atoms, parent_smiles FROM " +
"subst_pattern where core_smiles=='%s';" % smiles)
score = 0
for times_found, subst_atoms_blob, parent_smiles in self.cursor:
subst_atoms = _convert_blob_to_int_array(subst_atoms_blob)
for smiles_match in smiles_matches:
matches = 0
for subst_atom in subst_atoms:
if smiles_match[subst_atom] in from_atoms:
matches += 1
if matches == len(subst_atoms):
unweighted_score = old_div(float(len(subst_atoms)), \
round(comb(len(from_atoms), len(subst_atoms))))
score += unweighted_score * (1 - pow(0.5, times_found))
break
# Calculate the subst score using the smiles
return score
[docs]class AttachmentPoint:
"""
Class to map attachment point parameters to the coresponding rows in the
'm_attachment' maestro block. Used to write attachment blocks for core
hopping.
"""
[docs] def __init__(self,
from_anum,
to_anum,
name='',
atnum=ANY,
numbonds=ANY,
fcharge=ANY):
if from_anum < 1 or to_anum < 1:
raise ValueError(
f"Attachment pair {from_anum},{to_anum} contains an "
"invalid (zero or negative) atom index")
self.name = name
self.from_anum = from_anum
self.to_anum = to_anum
self.atnum = atnum
self.numbonds = numbonds
self.fcharge = fcharge
[docs] def makeRowObject(self):
"""
Creates a dictionary object for which each key corresponds to a column
in the 'm_attachment' attachment block used to specify core hopping
attachment points.
:return: dictionary for block_data to easily append a row to 'm_attachment'
with.
:rtype: dict
"""
return {
'i_m_atom1': self.from_anum,
'i_m_atom2': self.to_anum,
'i_m_num_reagents': 0,
'i_cgch_minlinker': 0,
'i_cgch_maxlinker': 0,
'i_m_ap_atnum': self.atnum,
'i_m_ap_numbonds': self.numbonds,
'i_m_ap_fcharge': self.fcharge,
's_m_attachment_name': self.name,
's_m_reagent_path': '',
's_m_functional_group': ''
}
[docs]def write_attachment_block(st, attachments):
"""
Writes the attachment points specified for a core hopping job
to the template core structure for downstream consumption.
:param st: template core structure for core hopping search
:type st: structure.Structure
:param attachments: list specifying attachment points
:type attachments: list(corehop.AttachmentPoint)
"""
new_data = {}
for index, attachment in enumerate(attachments, 1):
attachment.name = f'Attachment {index}'
new_row = attachment.makeRowObject()
block_data.append_row_to_data(new_data, ATTACHMENT_BLOCK, new_row)
block_data.write_blocks(st, new_data, truncate=True)