"""
Calculate structural interaction fingerprints.
The fingerprints are similar to those described in Deng et al, J. Med Chem
(2004), 47, 337.
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Quentin McDonald, Woody Sherman, Blaine Bell
import csv
import os
import subprocess
import numpy
from schrodinger.infra import canvas
from schrodinger.structutils import analyze
from schrodinger.structutils import measure
from schrodinger.structutils.interactions import hbond
from schrodinger.utils import csv_unicode
from schrodinger.utils import fileutils
[docs]class StructuralInteractionFingerprintGenerator(object):
"""
A class which generates Structural Interaction Fingerprints.
Based on the work
described in:
J. Med. Chem., 47 (2), 337-344, 2004.
'Structural Interaction Fingerprint (SIFt):
A Novel Method for Analyzing Three-Dimensional
Protein-Ligand Binding Interactions'
"""
# ASL definitions for various residue sets:
POLAR_RESIDUES = \
"res. ARG, ASP, GLU, HIS, ASN, GLN, LYS, SER, THR, ARN, ASH, GLH, HID, HIE, LYN"
HYDROPHOBIC_RESIDUES = \
"res. PHE, LEU, ILE, TYR, TRP, VAL, MET, PRO, CYS, ALA, CYX"
AROMATIC_RESIDUES = "res. PHE, TYR, TRP, TYO"
CHARGED_RESIDUES = "res. ARG, ASP, GLU, LYS, HIP, CYT, SRO, TYO, THO"
# Number of bits per residue:
NUM_BITS_PER_RESIDUE = 9
# Bit positions for each interaction:
CONTACT_POS = 0
BACKBONE_POS = 1
SIDECHAIN_POS = 2
POLAR_POS = 3
HYDROPHOBIC_POS = 4
ACCEPTOR_POS = 5
DONOR_POS = 6
AROMATIC_POS = 7
CHARGED_POS = 8
ANY_CONTACT = "Any Contact"
BACKBONE_INTERACTION = "Backbone Interaction"
SIDECHAIN_INTERACTION = "Sidechain Interaction"
POLAR_RESIDUE = "Polar Residues"
HYDROPHOBIC_RESIDUE = "Hydrophobic Residues"
HYDROGEN_BOND_ACCEPTOR = "Hydrogen Bond Acceptor"
HYDROGEN_BOND_DONOR = "Hydrogen Bond Donor"
AROMATIC_RESIDUE = "Aromatic Residue"
CHARGED_RESIDUE = "Charged Residue"
INTERACTION_NAMES = [
ANY_CONTACT, BACKBONE_INTERACTION, SIDECHAIN_INTERACTION, POLAR_RESIDUE,
HYDROPHOBIC_RESIDUE, HYDROGEN_BOND_ACCEPTOR, HYDROGEN_BOND_DONOR,
AROMATIC_RESIDUE, CHARGED_RESIDUE
]
[docs] def __init__(self):
"""
Constructor - no args
"""
self.reset()
# The cutoff distance for automatically determining the receptor
# region for heavy atom-heavy atom contacts:
self.cutoff_dist = 4.0
# The cutoff distance for automatically determining the receptor
# region for contact involving hydrogen atoms:
self.h_cutoff_dist = 2.5
# The hydrogen bonding parameters:
self.hbond_distance = 2.5
self.hbond_donor_angle = 120.0
self.hbond_acceptor_angle = 90.0
# self.tracker is a dictionary. The keys are three-tuples of
# (residue ID, ligand Entry ID, contact type) and the values are a list
# of atom numbers of ligand ID that have "contact type" with residue ID.
self.tracker = {}
[docs] def reset(self):
"""
Clear all the lists of fingerprints and associated data
"""
# The list of ChmBitsetObjects representing the FPs:
self.fp_list = []
# The list of ligand IDs, either positions in a file or entry IDs
self.ligand_ids = []
# The list of ligand titles:
self.ligand_titles = []
# List of all CT-level properties:
self.ligand_props = []
# A dictionary which associated fingerprints with ligand IDs
self.fp_map = {}
# List of _Residue for residues associated with each FP position
# NOTE: When non-conformers are aligned, these are residues form
# the reference structure.
self.residues = []
# The list of residues IDs associated with each position in the
# fingerprints - Chain:Resnum:InsCode
self.res_ids = []
# The list of residue types associated with each position
# (from reference structure, if aligning non-conformer compelxes)
self.residue_types = []
# The minimum and maximum residue index which has actually contributed
# to setting an interaction in the fingerprint
self.min_res = None
self.max_res = None
# The receptor structure and ID:
self.receptor_st = None
self.receptor_id = None
# Interactions: start with them all on:
self.include_contact = True
self.include_backbone = True
self.include_sidechain = True
self.include_polar = True
self.include_hydrophobic = True
self.include_acceptor = True
self.include_donor = True
self.include_aromatic = True
self.include_charged = True
[docs] def setHbondParameters(self, dist, donor_angle, acceptor_angle):
"""
Set the hydrogen bonding parameters, maximum distance, minimum
donor angle and mi mum acceptor angle
"""
self.hbond_distance = dist
self.hbond_donor_angle = donor_angle
self.hbond_acceptor_angle = acceptor_angle
[docs] def setInteractionCutoff(self, cutoff=4.0, h_cutoff=2.5):
"""
Sets the maximum interaction cutoff. No atom interactions beyond
this distance will be counted
"""
self.cutoff_dist = cutoff
self.h_cutoff_dist = h_cutoff
[docs] def setIncludeBackbone(self, include):
"""
Set whether we will include the backbone contact bit
"""
self.include_backbone = include
[docs] def setIncludeSidechain(self, include):
"""
Set whether we will include the side-chain contact bit
"""
self.include_sidechain = include
[docs] def setIncludePolar(self, include):
"""
Set whether we will include the polar residue contact bit
"""
self.include_polar = include
[docs] def setIncludeHydrophobic(self, include):
"""
Set whether we will include the hydrophobic residue contact bit
"""
self.include_hydrophobic = include
[docs] def setIncludeAcceptor(self, include):
"""
Set whether we will include the hydrogen bond acceptor contact bit
"""
self.include_acceptor = include
[docs] def setIncludeDonor(self, include):
"""
Set whether we will include the hydrogen bond donor contact bit
"""
self.include_donor = include
[docs] def setIncludeCharged(self, include):
"""
Set whether we will include the charged residue contact bit
"""
self.include_charged = include
[docs] def setIncludeAromatic(self, include):
"""
Set whether we will include the aromatic residue contact bit
"""
self.include_aromatic = include
[docs] def getMinRes(self):
"""
Return the first residue index for which an interaction is detected
"""
return self.min_res
[docs] def getMaxRes(self):
"""
Return the last residue index for which an interaction is detected
"""
return self.max_res
[docs] def getLigandID(self, idx):
"""
Return the ID for the idx'th ligand
"""
return self.ligand_ids[idx]
[docs] def getLigandTitle(self, idx):
"""
Returns the title for the idx'th ligand (if it exists)
"""
ret = self.ligand_titles[idx]
if ret is not None:
return ret
else:
return ""
[docs] def getResidueType(self, idx):
"""
Returns the PDB residue type for the idx'th residue
"""
return self.residue_types[idx]
[docs] def getResidueID(self, idx):
"""
Return the ID for the idx'th residue
"""
return self.res_ids[idx]
[docs] def getReceptorID(self):
"""
Returns the ID (usually an entry ID) associated with the
structure
"""
return self.receptor_id
[docs] def setReceptorStructure(self, receptor_st, id):
"""
Set the structure to be treated as the receptor and setup some
lists and sets of atoms we'll use in calculating subsequent
interactions.
If non-conformer proteins are used, use this method to set the
reference structure.
"""
self.receptor_st = receptor_st
self.receptor_id = id
# Calculate atom sets for residue interactions:
backbone_list = analyze.evaluate_asl(self.receptor_st, "backbone")
self.backbone_set = set(backbone_list)
polar_list = analyze.evaluate_asl(self.receptor_st, self.POLAR_RESIDUES)
self.polar_set = set(polar_list)
hydrophobic_list = analyze.evaluate_asl(self.receptor_st,
self.HYDROPHOBIC_RESIDUES)
self.hydrophobic_set = set(hydrophobic_list)
aromatic_list = analyze.evaluate_asl(self.receptor_st,
self.AROMATIC_RESIDUES)
self.aromatic_set = set(aromatic_list)
charged_list = analyze.evaluate_asl(self.receptor_st,
self.CHARGED_RESIDUES)
self.charged_set = set(charged_list)
self.residues = []
self.residue_types = []
self.res_ids = []
for chain in self.receptor_st.chain:
for res in chain.residue:
res_id = "%s:%d:%s" % (res.chain, res.resnum, res.inscode)
self.res_ids.append(res_id)
self.residues.append(res)
self.residue_types.append(res.pdbres)
[docs] def generateFingerprint(
self,
ligand,
id,
receptor_region,
ligand_title=None,
nonpolar_hydrogens=False,
receptor_st=None,
aligned_residues=None,
):
"""
Generate a structural interaction fingerprint for the given ligand
with id.
The receptor_region parameter is a list of receptor atom numbers
corresponding to the receptor residues that
are to be considered as interacting with the ligand. No other
residues will be considered (their bits in the fingerprint will
all be 0). If receptor_region is None then it will be calculated
automatically
The fingerprints generated are stored in internal
lists and can be accessed via other methods (or written to
disk ).
:type ligand: Structure class object
:param ligand: the ligand structure to calculate fingerprints for
:type id: str
:param id: the entry id of the ligand in the project table
:type receptor_region: None or list
:param receptor_region: custom list of atoms in the receptor site. If
None or empty list, the receptor set in setReceptorStructure() will
be used.
:type ligand_title: str
:param ligand_title: the name of the ligand being calculated
:type nonpolar_hydrogens: bool
:param nonpolar_hydrogens: True if nonpolar hydrogens should be
included in the calculation, False if not.
:type aligned_residues: list(structure._Residue|None)
:param aligned_residues: List of residues from this complex that
correspond to
"""
if receptor_st is None:
if self.receptor_st is None:
raise Exception("The receptor structure has not yet been set.")
receptor_st = self.receptor_st
residues = self.residues
else:
# Different receptor is specified,
assert aligned_residues is not None
assert len(aligned_residues) == len(self.residues)
residues = aligned_residues
# NOTE: residues is a list of _Residue objects to consider for the
# fingerprint.
# Remove non-polar hydrogens from the ligand if requested
if nonpolar_hydrogens:
ligand_set = ligand.getAtomIndices()
else:
asl = 'not atom.mtype H1'
ligand_set = analyze.evaluate_asl(ligand, asl)
# If no region is specified then calculate one now. This will allow
# us to calculate both a receptor and speed up the calculation:
if receptor_region is None:
rec_atoms = receptor_st.atom_total
complex_st = receptor_st.copy()
complex_st.extend(ligand)
cutoff = max(self.cutoff_dist, self.h_cutoff_dist)
lig_asl = '(a.num > %i)' % rec_atoms
asl = '(fillres within %f %s) AND NOT %s' % (cutoff, lig_asl,
lig_asl)
if not nonpolar_hydrogens:
asl += ' AND NOT atom.mtype H1'
receptor_set = set(analyze.evaluate_asl(complex_st, asl))
else:
receptor_set = set(receptor_region)
self.ligand_ids.append(id)
self.ligand_titles.append(ligand_title)
self.ligand_props.append(dict(ligand.property))
on_bits = {}
for res_cnt, residue in enumerate(residues):
res_id = self.res_ids[res_cnt]
if residue is None:
# There is no residue at this position in the receptor after
# aligning it to reference receptor.
continue
res_atoms = residue.getAtomIndices()
if res_atoms[0] not in receptor_set:
# This residue is not part of the receptor (e.g. a solvent
# or ligand residue).
continue
# Examine all the ligand/residue interactions:
res_index = res_cnt * self.NUM_BITS_PER_RESIDUE
for iratom in res_atoms:
ratom = receptor_st.atom[iratom]
ratom_is_h = ratom.element == 'H'
for ligindex in ligand_set:
latom = ligand.atom[ligindex]
d = measure.measure_distance(ratom, latom)
if ratom_is_h or latom.element == 'H':
cutoff = self.h_cutoff_dist
else:
cutoff = self.cutoff_dist
if d < cutoff:
# Update the minimum and maximum residue numbers
if (self.min_res is None or res_cnt < self.min_res):
self.min_res = res_cnt
if (self.max_res is None or res_cnt > self.max_res):
self.max_res = res_cnt
# There is a contact, turn on res_index +1
bit_index = "%d" % (res_index + self.CONTACT_POS)
if self.include_contact:
on_bits[bit_index] = 1
self.tracker.setdefault(
(res_id, id, self.CONTACT_POS),
set()).add(ligindex)
# Check backbone:
if int(ratom) in self.backbone_set:
bit_index = "%d" % (res_index + self.BACKBONE_POS)
if self.include_backbone:
on_bits[bit_index] = 1
self.tracker.setdefault(
(res_id, id, self.BACKBONE_POS),
set()).add(ligindex)
else:
bit_index = "%d" % (res_index + self.SIDECHAIN_POS)
if self.include_sidechain:
on_bits[bit_index] = 1
self.tracker.setdefault(
(res_id, id, self.SIDECHAIN_POS),
set()).add(ligindex)
# Check residue type:
if int(ratom) in self.polar_set:
bit_index = "%d" % (res_index + self.POLAR_POS)
if self.include_polar:
on_bits[bit_index] = 1
self.tracker.setdefault(
(res_id, id, self.POLAR_POS),
set()).add(ligindex)
if int(ratom) in self.hydrophobic_set:
bit_index = "%d" % (res_index +
self.HYDROPHOBIC_POS)
if self.include_hydrophobic:
on_bits[bit_index] = 1
self.tracker.setdefault(
(res_id, id, self.HYDROPHOBIC_POS),
set()).add(ligindex)
if int(ratom) in self.aromatic_set:
bit_index = "%d" % (res_index +
self.AROMATIC_POS)
if self.include_aromatic:
on_bits[bit_index] = 1
self.tracker.setdefault(
(res_id, id, self.AROMATIC_POS),
set()).add(ligindex)
if int(ratom) in self.charged_set:
bit_index = "%d" % (res_index +
self.CHARGED_POS)
if self.include_charged:
on_bits[bit_index] = 1
self.tracker.setdefault(
(res_id, id, self.CHARGED_POS),
set()).add(ligindex)
if hbond.match_hbond(latom, ratom, self.hbond_distance,
self.hbond_donor_angle,
self.hbond_acceptor_angle):
if ratom.atomic_number == 1:
bit_index = "%d" % (res_index + self.DONOR_POS)
if self.include_donor:
on_bits[bit_index] = 1
self.tracker.setdefault(
(res_id, id, self.DONOR_POS),
set()).add(ligindex)
else:
bit_index = "%d" % (res_index +
self.ACCEPTOR_POS)
if self.include_acceptor:
on_bits[bit_index] = 1
self.tracker.setdefault(
(res_id, id, self.ACCEPTOR_POS),
set()).add(ligindex)
# We now have a list of "on" bits in on_bits.keys(). Convert this into
# a canvas fingerprint and save it to the list
on_string = ",".join(list(on_bits))
fp = chm_sparse_bitset_from_string(on_string)
self.fp_list.append(fp)
self.fp_map[id] = fp
[docs] def writeFPFile(self, file_name, all_props=False):
"""
Write the stored fingerprints to a Canvas FP file given by 'file_name'
:param all_props: Whether to also export all CT-level properties.
:type all_props: bool
"""
if not self.fp_list:
return
if all_props:
# First write a .csv file with the properties, as we have no way to
# directly write a binary FP file with properties from Python
csv_name = file_name + '_temp_csv'
self.writeCSVFile(csv_name, all_props=True, for_binaryFP=True)
# Now use the canvasCSV2FPBinary utility to convert csv to fp
cnvt_cmd = [
os.path.join(os.environ['SCHRODINGER'], 'utilities',
'canvasCSV2FPBinary')
]
cnvt_cmd.extend(['-icsv', csv_name, '-o', file_name])
converter = subprocess.Popen(cnvt_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True)
out, err = converter.communicate()
if converter.returncode:
msg = "Fingerprint export failed: could not convert CSV file to binary fingerprint file."
raise RuntimeError('%s\n%s\n%s' % (msg, out, err))
# Remove file left behind by the conversion process
fileutils.force_remove(csv_name)
else:
# Do not include CT-level properties
fp_out = canvas.ChmCustomOut32('1')
fp_out.open(file_name)
for i, fp in enumerate(self.fp_list):
fp_out.write(fp, str(self.ligand_ids[i]))
fp_out.close()
[docs] def writeCSVFile(self, file_name, all_props=False, for_binaryFP=False):
"""
Write the stored fingerprints to a CSV file given by `file_name`
:param all_props: Whether to also export all CT-level properties.
:type all_props: bool
:param for_binaryFP: Whether to add "BIT" prefix to the binary column
headers.
:bype for_binaryFP: bool
"""
header_strings = [
"contact", "backbone", "sidechain", "polar", "hydrophobic",
"acceptor", "donor", "aromatic", "charged"
]
if len(self.fp_list):
with csv_unicode.writer_open(file_name) as ofile:
csv_writer = csv.writer(ofile)
# Write the header row, which has the title and description
# of each bit. If non-conformer proteins are used, the residues
# listed are those of the reference structure.
row = []
row.append("Title")
ibit = 0
for res in self.res_ids:
hres = res.replace(":", "")
for i in range(self.NUM_BITS_PER_RESIDUE):
ibit += 1
if for_binaryFP:
# If CSV file will be passed onto
# canvasCSV2FPBinary, column names must be integers
# with BIT prefix, which is important in order to
# distinguish the FB columns from the CT-level
# property columns.
colname = 'BIT%06i' % ibit
else:
colname = "%s_%s" % (hres.rstrip(),
header_strings[i])
row.append(colname)
if all_props:
# Add header for the other CT-level properties:
all_prop_names = set()
for st_props in self.ligand_props:
all_prop_names.update(list(st_props))
all_prop_names = sorted(all_prop_names)
row += all_prop_names
csv_writer.writerow(row)
# Now for each ligand write out a row representing the
# on and off bits:
for ilig, fp in enumerate(self.fp_list):
row = []
row.append(self.ligand_titles[ilig])
res_index = 0
for res in self.res_ids:
for bit in range(self.NUM_BITS_PER_RESIDUE):
if fp.positionOf(res_index + bit) >= 0:
row.append("1")
else:
row.append("0")
res_index += self.NUM_BITS_PER_RESIDUE
if all_props:
st_props = self.ligand_props[ilig]
for propname in all_prop_names:
row.append(st_props.get(propname, ""))
csv_writer.writerow(row)
[docs] def writeInteractionsFile(self, file_name):
"""
Write ligand atom indices involved in interactions to a CSV file given by `file_name`
:param file_name: name of CSV file
:type file_name: str
"""
header_strings = [
"contact", "backbone", "sidechain", "polar", "hydrophobic",
"acceptor", "donor", "aromatic", "charged"
]
# CSV file won't be written if no FPs were generated.
if not self.fp_list:
return
with csv_unicode.writer_open(file_name) as fh:
csv_writer = csv.writer(fh)
# Write the header row, which has the title and description
# of each bit. If non-conformer proteins are used, the residues
# listed are those of the reference structure.
row = ["Title"]
assert len(header_strings) == self.NUM_BITS_PER_RESIDUE
for res in self.res_ids:
hres = res.replace(":", "")
for i in range(self.NUM_BITS_PER_RESIDUE):
colname = "%s_%s" % (hres.rstrip(), header_strings[i])
row.append(colname)
csv_writer.writerow(row)
# Now for each ligand write out a row representing the
# on and off bits:
for lig_id, title in zip(self.ligand_ids, self.ligand_titles):
row = [title]
for res_id in self.res_ids:
for itype in range(len(self.INTERACTION_NAMES)):
atoms_str = self.tracker.get((res_id, lig_id, itype),
'')
row.append(atoms_str)
csv_writer.writerow(row)
[docs] def __len__(self):
"""
Return the length of the fingerprint list
"""
return len(self.fp_list)
def __getitem__(self, idx):
"""
Return the fingerprint given by 'idx'. If this is an integer
then return the idx'th item. If this is a string then treat it
as an ligand ID
"""
if type(idx) == type(1):
return self.fp_list[idx]
else:
return self.fp_map[idx]
def __iter__(self):
"""
An iterator for the fingerprints
"""
for fp in self.fp_list:
yield fp
[docs] def getInteractionMatrix(self, which_interaction):
"""
Returns a tuple containing:
i) matrix of 0s and 1s for the specified interaction.
The matrix is only generated between the first and last residue
which had an interaction with any ligand
2) An array of length N where each element is the total number
of bits turned on across all ligands the current interaction and
N is the number of residues which have an interaction
3) An array of length M where each element is the number of bits
turned on for the i'th ligand across all residues and M is the
number of Ligands
Returns None, None, None if no interactions were found.
"""
num_ligands = len(self.ligand_ids)
if self.max_res is None or self.min_res is None:
return None, None, None
num_residues = (self.max_res - self.min_res) + 1
matrix = numpy.zeros((num_ligands, num_residues), numpy.double)
res_freq = numpy.zeros(num_residues, numpy.double)
lig_freq = numpy.zeros(num_ligands, numpy.double)
# Figure out the bit offset. This will be used to determine
# which interaction is being plotted:
bit_offset = self.INTERACTION_NAMES.index(which_interaction)
if bit_offset is None:
raise Exception("Unknown interaction type %s" % which_interaction)
res_index = self.min_res * self.NUM_BITS_PER_RESIDUE
for ires in range(self.min_res, self.max_res + 1):
for ilig in range(len(self.ligand_ids)):
fp = self.fp_list[ilig]
if fp.positionOf(res_index + bit_offset) >= 0:
matrix[ilig, ires - self.min_res] = 1.0
res_freq[ires - self.min_res] += 1
lig_freq[ilig] += 1
res_index += self.NUM_BITS_PER_RESIDUE
return (matrix, res_freq, lig_freq)
[docs] def getFingerprintString(self, ligand_index, residue_index):
"""
Returns a string which summarizes the interactions
between between ligand 'ligand_index' and residue 'residue_index'
"""
fp = self.fp_list[ligand_index]
ret_list = []
res_index = residue_index * self.NUM_BITS_PER_RESIDUE
if fp.positionOf(res_index) >= 0:
ret_list.append("Cont.")
if fp.positionOf(res_index + self.BACKBONE_POS) >= 0:
ret_list.append("Back.")
if fp.positionOf(res_index + self.SIDECHAIN_POS) >= 0:
ret_list.append("Side.")
if fp.positionOf(res_index + self.POLAR_POS) >= 0:
ret_list.append("Pol.")
if fp.positionOf(res_index + self.HYDROPHOBIC_POS) >= 0:
ret_list.append("Hyd.")
if fp.positionOf(res_index + self.ACCEPTOR_POS) >= 0:
ret_list.append("Acc.")
if fp.positionOf(res_index + self.DONOR_POS) >= 0:
ret_list.append("Don.")
if fp.positionOf(res_index + self.AROMATIC_POS) >= 0:
ret_list.append("Aro.")
if fp.positionOf(res_index + self.CHARGED_POS) >= 0:
ret_list.append("Chg.")
if len(ret_list):
return ",".join(ret_list)
else:
return ""
[docs]def chm_sparse_bitset_from_string(on_string):
"""
Convert the on_string to a canvas fingerprint
:param on_string: ',' or ' ' separated string of keys whose bits should
be set
:type on_string: `str`
:return: finger print bitset
:rtype: `canvas.ChmSparseBitset`
"""
# CANVAS-5675 suggested workaround for bad bit set positionOf results
# when an empty string is used.
if not on_string:
return canvas.ChmSparseBitset()
return canvas.ChmSparseBitset.fromString(on_string)