"""
This module implements interactions between Maestro and multiple sequence
viewer.
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Deacon Sweeney, Piotr Rotkiewicz
import os
from past.utils import old_div
from . import constants
from .align import align
from .residue import Residue
from .sequence import Sequence
from .structure_utils import calculate_sasa_dict
try:
from schrodinger.maestro import maestro
except ImportError:
maestro = None
try:
from schrodinger.structutils.color import Color
except ImportError:
pass
try:
from schrodinger.structutils.analyze import AslLigandSearcher
from schrodinger.structutils.analyze import calculate_sasa_by_atom
from schrodinger.structutils.analyze import evaluate_asl
except:
evaluate_asl = None
try:
from schrodinger.structutils.interactionfp import \
StructuralInteractionFingerprintGenerator
except:
StructuralInteractionFingerprintGenerator = None
try:
from schrodinger.infra import mm
except ImportError:
mm = None
try:
from schrodinger import structure
except:
structure = None
[docs]def hasMaestro():
"""
Checks if the program runs from Maestro environment. Additionally, this
method sets Maestro callbacks.
:rtype: bool
:return: True if run from Maestro, False otherwise.
"""
if maestro:
return True
return False
[docs]def maestroSuperposition(viewer, show_panel=True):
if maestro:
sequence_group = viewer.sequence_group
sequence_group.selectAlignedBlocks(selected_only=True)
maestroSelectResiduesInWorkspace(viewer)
if show_panel:
maestro.command("showpanel superimpose:ASL")
selected_atom_indices = maestro.selected_atoms_get()
st = maestro.workspace_get()
query = ""
for i in selected_atom_indices:
atom = st.atom[i]
if "CA" in atom.pdbname:
if len(query) > 0:
query += " | "
query += " at.n " + str(i)
if len(query) > 0:
query = "superimposeset " + query
try:
maestro.command(query)
except:
return False
return True
return False
[docs]def renumberMaestroEntry(viewer, sequence, update_sequence=False):
"""
This method assigns residue numbers to corresponding structure
in Maestro workspace.
It can also do reverse operation, i.e. assign atom and residue
numbers to MSV sequence if "update_sequence" argument is True.
:type update_sequence: boolean
:param update_sequence: Changes seqeunce renumbering direction:
when False, assigns MSV numbers to Maestro entry. When True,
assigns Maestro numbers to MSV sequence.
:type sequence: `Sequence`
:param: sequence: Sequence to be used for propagating colors to Maestro.
"""
if not maestro or not sequence.from_maestro:
return
result = maestro.project_table_synchronize()
if not result:
return False
project_table = maestro.project_table_get()
if not project_table:
return False
valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1)
viewer.maestro_busy = True
is_included = False
for row in project_table.included_rows:
entry_id = row.entry_id
if entry_id != sequence.maestro_entry_id:
continue
is_included = True
for row in project_table.all_rows:
entry_id = row.entry_id
if entry_id != sequence.maestro_entry_id:
continue
st = row.getStructure(props=True, copy=False)
if not update_sequence and is_included:
maestro.command("entrywsexclude entry %s" % (entry_id))
renumbered = False
for chain in st.chain:
is_protein = False
for atom in chain.atom:
residue_name = atom.pdbres[:3]
if residue_name in valid_amino_acids:
chain_name = atom.chain
is_protein = True
break
if is_protein:
if chain_name == sequence.maestro_chain_name:
renumbered = True
res_dict = {}
for res in sequence.residues:
res_dict[res.maestro_residue_index] = res
# Get unsorted list of residues
residue_list = structure.get_residues_unsorted(chain)
for res_index, residue in enumerate(residue_list):
ca_atom = None
for atom in residue.atom:
if "CA" in atom.pdbname:
ca_atom = atom
break
if not ca_atom:
continue
if res_index in res_dict:
res = res_dict[res_index]
if update_sequence:
res.num = atom.resnum
res.icode = atom.inscode
res.maestro_atom_num = atom.index
res.maestro_color = atom.color.rgb
else:
for atom in residue.atom:
atom.resnum = res.num
atom.inscode = res.icode
# Set the structure so that numbering changes are preserved.
if renumbered and not update_sequence:
row.setStructure(st)
maestro.project_table_update()
if is_included:
maestro.command("entrywsinclude entry %s" % (entry_id))
viewer.maestro_busy = False
return True
[docs]def propagateColorsToMaestro(viewer, sequence, template_colors=False):
"""
This method propagates colors from the given sequence to
corresponding structure in Maestro workspace.
:type sequence: `Sequence`
:param: sequence: Sequence to be used for propagating colors to Maestro.
"""
sequence_group = viewer.sequence_group
if not maestro:
return
result = maestro.project_table_synchronize()
if not result:
return False
project_table = maestro.project_table_get()
if not project_table:
return False
valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1)
do_redraw = False
viewer.maestro_busy = True
if template_colors:
tr = tg = tb = 255
structure_list = sequence_group.getStructureList(omit_reference=True)
if sequence in structure_list:
tr, tg, tb = constants.TEMPLATE_COLORS[
structure_list.index(sequence) % constants.N_TEMPLATE_COLORS]
if sequence.from_maestro:
for row in project_table.included_rows:
entry_id = row.entry_id
if entry_id != sequence.maestro_entry_id:
continue
st = row.getStructure(props=True, copy=False)
for chain in st.chain:
is_protein = False
for atom in chain.atom:
residue_name = atom.pdbres[:3]
if residue_name in valid_amino_acids:
chain_name = atom.chain
is_protein = True
break
if is_protein:
if chain_name == sequence.maestro_chain_name:
maestro.command("entrywsexclude entry %s" % (entry_id))
res_dict = {}
for res in sequence.residues:
res_dict[res.maestro_atom_num] = res
for res_index, residue in enumerate(chain.residue):
ca_atom = None
for atom in residue.atom:
if "CA" in atom.pdbname:
ca_atom = atom
break
if not ca_atom:
continue
if ca_atom.index in res_dict:
res = res_dict[ca_atom.index]
if not template_colors:
r, g, b = res.color
if res.marked_color:
r, g, b = res.marked_color
elif res.model:
r = tr
g = tg
b = tb
else:
r = g = b = 255
new_color = Color((int(r), int(g), int(b)))
residue.color = new_color
# Change color of all atoms
for res_atom in residue.atom:
res_atom.color = new_color
# res_atom.setColorRGB(r, g, b)
# Set the structure so that color changes are preserved.
row.setStructure(st)
maestro.command("entrywsinclude entry %s" % (entry_id))
do_redraw = True
if do_redraw:
maestro.redraw_request()
viewer.maestro_busy = False
return True
[docs]def maestroCenterOnResidue(viewer, sequence_area, res):
"""
This function centers workspace on a specified residue. If the residue
doesn't belong to the Maestro entry, or something else goes wrong,
the function returns False. Otherwise, if the operation was successful,
it will return True.
:type sequence_area: `SequenceArea`
:param sequence_area: Sequence area that is calling this function.
:type res: `Residue`
:param res: residue to be centered on
:rtype: bool
:return: True if operation successfull, otherwise False
"""
if maestro and res and res.sequence and res.sequence.from_maestro:
# Zoom-in onto the residue.
command = "fit (entry.id \"%s\" and chain.name \"%s\"" \
"and res.num \"%s\" and res.inscode \"%s\")" \
% (res.sequence.maestro_entry_id, res.sequence.maestro_chain_name,
res.num, res.icode)
try:
maestro.command(command)
except:
return False
# Center on alpha carbon.
command = "spotcenter \"%s\"" % (res.maestro_atom_num)
try:
maestro.command(command)
except:
return False
# Zoom-in onto the residue.
command = "enhance3d (entry.id \"%s\" and chain.name \"%s\"" \
" and res.num \"%s\" and res.inscode \"%s\")" \
% (res.sequence.maestro_entry_id, res.sequence.maestro_chain_name,
res.num, res.icode)
try:
maestro.command(command)
except:
return False
return True
return False
[docs]def maestroMutateResidue(viewer, res, new_code, mutate_in_workspace=True):
"""
Mutates a single residue. Optionally modifies a corresponding Maestro
entry.
:note: This method doesn't update anntotations. The caller should update
the annotations according to the introduced mutation.
:type res: `Residue`
:param res: residue to be mutated
:type new_code: str
:param new_code: code of the new residue.
:rtype: bool
:return: True if successfully synchronized with Maestro, False if not.
"""
# No need to mutate to the same residue.
if new_code == res.code:
return
if res and res.sequence and res.sequence.type == constants.SEQ_AMINO_ACIDS:
if new_code in list(constants.AMINO_ACIDS) and res.code != new_code:
if not maestro or not res.sequence.from_maestro:
# Set new residue code.
res.code = new_code
res.makeName()
# If gap, just replace it by amino acid.
res.is_gap = False
# Make the residue structureless
res.structureless = True
res.structure = []
elif not res.is_gap and res.sequence.from_maestro and \
mutate_in_workspace:
# Set new residue code.
res.code = new_code
aa_name = constants.AMINO_ACIDS[new_code][0]
# Pick the amino acid fragment.
command = "fragment peptide " + aa_name
maestro.command(command)
# Mutate this residue.
command = "mutate entry.id \"%s\" and chain.name \"%s\"" \
" and res.num \"%s\" and res.inscode \"%s\"" \
% (str(res.sequence.maestro_entry_id),
str(res.sequence.maestro_chain_name), str(res.num),
str(res.icode))
try:
maestro.command(command)
except:
return
# Now we need to update the sequence to reflect
# atom numbering changes
renumberMaestroEntry(viewer, res.sequence, update_sequence=True)
[docs]def maestroGetSelectedResiduesASL(sequence_group, sequence=None):
"""
Returns an ASL string corresponding to selection in MSV.
This function attempts to produce a compact ASL expression,
e.g. consecutive selected residues are expressed as a range.
"""
try:
# Build a selection query.
asl_query = ""
tmp_query = ""
for seq in sequence_group.sequences:
if sequence and seq != sequence:
continue
if seq.from_maestro and \
seq.hasSelectedResidues() and \
seq.maestro_included:
if seq.hasAllSelectedResidues():
tmp_query = "(entry.id \"%s\" AND chain.name \"%s\")" % \
(seq.maestro_entry_id, seq.maestro_chain_name)
else:
tmp_query = "(entry.id \"%s\" AND chain.name \"%s\" AND fillres(" % \
(seq.maestro_entry_id, seq.maestro_chain_name)
previous = False
res_str = ""
for res in seq.residues:
if res.selected and res.maestro_atom_num is not None:
if previous:
res_str += "," + str(res.maestro_atom_num)
else:
res_str += "atom.entrynum " + \
str(res.maestro_atom_num)
previous = True
if res_str:
tmp_query += res_str + "))"
else:
tmp_query = ""
if tmp_query:
if asl_query:
asl_query += " OR " + tmp_query
else:
asl_query = tmp_query
except:
return ""
return asl_query
[docs]def maestroSelectResiduesInWorkspace(viewer):
"""
This method selects residues in Maestro workspace based on current
selection.
:type sequence_group: `SequenceGroup`
:param sequence_group: source sequence group
:rtype: bool
:return: True if successfully synchronized with Maestro, False if not.
"""
if not maestro:
return False
sequence_group = viewer.sequence_group
viewer.maestro_busy = True
asl_query = maestroGetSelectedResiduesASL(sequence_group)
try:
if asl_query != "":
command = "workspaceselectionreplace " + asl_query
maestro.command(command)
else:
maestro.command("workspaceselectionclear")
except:
pass
viewer.maestro_busy = False
[docs]def maestroSynchronize(sequence_group):
if not maestro:
return False
project_table = None
try:
project_table = maestro.project_table_get()
except:
return False
if not project_table:
return False
maestro_entries = {}
# Loop over all Maestro sequences and build a list of entries.
for seq in sequence_group.sequences:
seq._found = True
if seq.from_maestro:
entry = seq.maestro_entry_id
try:
row = project_table.getRow(entry_id=entry)
except:
seq._found = False
if row is None:
seq._found = False
chain = seq.maestro_chain_name
if entry not in maestro_entries:
maestro_entries[entry] = {}
maestro_entries[entry][chain] = seq
# Get set of workspace entry ids
workspace = maestro.workspace_get()
workspace_entry_ids = set()
for atom in workspace.atom:
workspace_entry_ids.add(atom.entry_id)
for seq in sequence_group.sequences:
if seq.maestro_entry_id in workspace_entry_ids:
seq.maestro_included = True
else:
seq.maestro_included = False
# Change 'from_maestro' flag for non-existing sequences.
for entry in maestro_entries.values():
for seq in entry.values():
if not seq._found:
seq.from_maestro = False
[docs]def maestroIncorporateEntries(
sequence_group,
what="included",
entry_ids=None,
ignore=[], # noqa: M511
include=False,
incorporate_scratch_entry=False,
align_func=None,
ct=None,
use_title=False,
viewer=None):
"""
Incorporates Maestro entries into the sequence viewer.
"""
if not maestro and not ct:
return False
if maestro:
maestro.project_table_synchronize()
project_table = maestro.project_table_get()
maestro_entries = {}
valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1)
st_list = []
if ct:
st_list.append((None, ct))
else:
# Loop over all Maestro sequences and build a list of entries.
for seq in sequence_group.sequences:
if seq.from_maestro:
entry = seq.maestro_entry_id
chain = seq.maestro_chain_name
if entry not in maestro_entries:
maestro_entries[entry] = {}
maestro_entries[entry][chain] = seq
if what == "included":
rows = reversed(list(project_table.included_rows))
elif what == "selected":
rows = project_table.selected_rows
elif what == "all":
rows = project_table.all_rows
elif what == "custom":
rows = (project_table.getRow(eid) for eid in entry_ids)
else:
return False
st_list = [(row.entry_id, row.getStructure()) for row in rows]
workspace = maestro.workspace_get(copy=True)
workspace_entry_ids = set()
for atom in workspace.atom:
workspace_entry_ids.add(atom.entry_id)
if not st_list and workspace and incorporate_scratch_entry:
st_list.append(("Scratch", workspace))
for entry_id, st in st_list:
if entry_id in ignore:
# Ignore this entry ID.
continue
found = False
has_seqres = True
try:
data_handle = mm.mmct_ct_m2io_get_unrequested_handle(st)
except:
try:
data_handle = mm.mmct_ct_get_or_open_additional_data(st, True)
except:
has_seqres = False
chain_seq_dict = {}
if has_seqres:
num_seqres_blocks = mm.m2io_get_number_blocks(
data_handle, "m_PDB_SEQRES")
if num_seqres_blocks == 0:
has_seqres = False
if has_seqres:
try:
mm.m2io_goto_block(data_handle, "m_PDB_SEQRES", 1)
except:
has_seqres = False
if has_seqres:
num_rows = mm.m2io_get_index_dimension(data_handle)
for i in range(1, num_rows + 1):
chain_strings = mm.m2io_get_string_indexed(
data_handle, i, ["s_pdb_chain_id"])
seqres_strings = mm.m2io_get_string_indexed(
data_handle, i, ["s_pdb_SEQRES"])
chain = chain_strings[0]
seqres = seqres_strings[0]
residue_list = seqres.split(" ")
seq_elements = []
for res in residue_list:
if len(res) == 3:
if res in valid_amino_acids:
seq_elements.append(
constants.AMINO_ACIDS_3_TO_1[res])
else:
seq_elements.append("X")
sequence_string = "".join(seq_elements)
if sequence_string:
chain_seq_dict[chain] = sequence_string
mm.m2io_leave_block(data_handle)
for chain in st.chain:
seq = None
if entry_id in maestro_entries:
chains = maestro_entries[entry_id]
if chain.name in chains:
seq = chains[chain.name]
is_protein = False
for residue in chain.residue:
if residue.pdbres[:3] in valid_amino_acids:
is_protein = True
break
if not is_protein:
continue
secondary = None
ssb = None
if not seq:
seq = Sequence()
seq.type = constants.SEQ_AMINO_ACIDS
seq.chain_id = chain.name[0]
seq.color_scheme = constants.COLOR_MAESTRO
seq.has_structure = True
if entry_id:
seq.from_maestro = True
seq.maestro_included = entry_id in workspace_entry_ids
seq.maestro_entry_id = entry_id
seq.maestro_chain_name = chain.name
sequence_group.sequences.append(seq)
else:
for child in seq.children:
if child.type == constants.SEQ_SECONDARY:
secondary = child
break
for child in seq.children:
if child.type == constants.SEQ_ANNOTATION and \
child.annotation_type == constants.ANNOTATION_SSBOND:
ssb = child
break
if ssb is None:
ssb = seq.createSSBondAssignment()
ssb.parent_sequence = seq
seq.children.append(ssb)
# This code comes from mcpro, zmat.py
bond_list = []
asl = "a.pt SG and (res. CYS or res. CYX)"
if chain.name > ' ':
asl += " and (chain.name %s)" % str(chain.name)
s_atoms = evaluate_asl(st, asl)
found = False
for s_atom in s_atoms:
for s2_atom in s_atoms:
if mm.mmct_is_atom_bonded(st, s_atom, s2_atom) == mm.TRUE:
found = True
if s_atom <= s2_atom:
resi1 = str(
st.atom[s_atom].resnum) + \
str(st.atom[s_atom].inscode)
resi2 = str(
st.atom[s2_atom].resnum) + \
str(st.atom[s2_atom].inscode)
bond_list.append((resi1, resi2))
if not found:
seq.children.remove(ssb)
if secondary is None:
secondary = seq.createSecondaryAssignment()
secondary.parent_sequence = seq
seq.children.append(secondary)
if entry_id:
name = getMaestroStructureName(st,
entry_id,
use_title=use_title)
else:
name = "Structure"
name = name[:]
short_name = name[:]
seq.name = name
seq.short_name = short_name
residues = []
ss_residues = []
ssb_residues = []
# Get unsorted list of residues (get residues by connectivity)
residue_list = structure.get_residues_unsorted(chain)
for res_index, residue in enumerate(residue_list):
ca_atom = None
for atom in residue.atom:
if "CA" in atom.pdbname:
ca_atom = atom
break
if not ca_atom:
continue
if residue.pdbres[:3] in valid_amino_acids:
code = constants.AMINO_ACIDS_3_TO_1[residue.pdbres[:3]]
else:
code = 'X'
res = Residue()
res.sequence = seq
res.code = code
res.name = residue.pdbres[:3]
res.num = residue.resnum
res.icode = residue.inscode
res.structureless = False
res.bfactor = ca_atom.temperature_factor
if entry_id:
res.maestro_color = ca_atom.color.rgb
res.maestro_atom_num = ca_atom.index
res.maestro_residue_index = res_index
ss_res = Residue()
ss_res.sequence = seq
if ssb:
ssb_res = Residue()
ssb_res.sequence = seq
ssb_res.code = ' '
ssb_residues.append(ssb_res)
if ca_atom.secondary_structure == 1:
ss_res.code = 'H'
res.ss_code = 'H'
elif ca_atom.secondary_structure == 2:
ss_res.code = 'E'
res.ss_code = 'E'
else:
ss_res.code = ' '
res.ss_code = ' '
residues.append(res)
ss_residues.append(ss_res)
if residues:
seq.residues = residues
secondary.residues = ss_residues
if ssb and ssb in seq.children:
ssb.residues = ssb_residues
if chain.name in chain_seq_dict and align_func:
# Use full SEQRES sequence.
full_seq = Sequence()
full_seq.appendResidues(chain_seq_dict[chain.name])
align_func(full_seq, seq)
if ssb and ssb in seq.children:
#ssb.residues = ssb_residues
new_bond_list = []
for rid1, rid2 in bond_list:
ridx1 = seq.getResidueIndex(rid1)
ridx2 = seq.getResidueIndex(rid2)
if ridx1 is not None and ridx2 is not None:
new_bond_list.append((ridx1, ridx2))
seq.ssb_bond_list = new_bond_list
ssb.bond_list = new_bond_list
if not new_bond_list:
seq.children.remove(ssb)
else:
ssb.height = int(len(new_bond_list) * 0.5 + 0.5)
for res in seq.residues:
res.sequence = seq
for child in seq.children:
child.parent_sequence = seq
for res in child.residues:
res.sequence = child
else:
sequence_group.sequences.remove(seq)
if not structure and viewer:
synchronizePropertiesWithMaestro(viewer, selection=True)
return True
[docs]def maestroCalculateMinimumDistance(sequence):
"""
Calculates minimum distance in agnstroms between selected residues
in the specified sequence and remaining residues in the sequence.
"""
if not maestro:
return False
result = maestro.project_table_synchronize()
if not result:
return False
project_table = maestro.project_table_get()
if not project_table:
return False
valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1)
c_alpha_dict = {}
for row in project_table.all_rows:
st = row.getStructure(props=True, copy=False)
chain_name = None
entry_id = None
for chain in st.chain:
for atom in chain.atom:
residue_name = atom.pdbres[:3]
if residue_name in valid_amino_acids:
chain_name = atom.chain
entry_id = st.property["s_m_entry_id"]
break
if sequence.maestro_chain_name == chain_name and \
sequence.maestro_entry_id == entry_id:
c_alpha_dict = {}
for atom in chain.atom:
if "CA" in atom.pdbname:
res_id = str(atom.resnum) + str(atom.inscode)
c_alpha_dict[res_id] = atom
break
distances = [1000000.0] * len(sequence.residues)
for sel_res in sequence.residues:
if sel_res.selected:
res_id = str(sel_res.num) + str(sel_res.icode)
if res_id in c_alpha_dict:
sel_atom = c_alpha_dict[res_id]
for idx, res in enumerate(sequence.residues):
res_id = str(res.num) + str(res.icode)
if res_id in c_alpha_dict:
atom = c_alpha_dict[res_id]
dx = atom.x - sel_atom.x
dy = atom.y - sel_atom.y
dz = atom.z - sel_atom.z
dist = dx * dx + dy * dy + dz * dz
if dist < distances[idx]:
distances[idx] = dist
return distances
[docs]def synchronizePropertiesWithMaestro(viewer, colors=False, selection=False):
"""
Synchronizes Maestro workspace colors or selection state with
corresponding sequences in the sequence viewer.
:type colors: bool
:param colors: Synchronize colors.
:type selection: bool
:param selection: Synchronize selection state.
:rtype: bool
:return: True on successful synchronization, False otherwise
"""
sequence_group = viewer.sequence_group
if not maestro:
return False
result = maestro.project_table_synchronize()
if not result:
return False
project_table = maestro.project_table_get()
if not project_table:
return False
valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1)
if selection:
workspace_st = maestro.workspace_get(copy=False)
workspace_selection = maestro.selected_atoms_get()
selected_atoms = []
for idx in workspace_selection:
atom = workspace_st.atom[idx]
selected_atoms.append((atom.entry_id, atom.number_by_entry))
for row in project_table.included_rows:
entry_id = row.entry_id
st = row.getStructure(props=True, copy=False)
for chain in st.chain:
is_protein = False
for atom in chain.atom:
residue_name = atom.pdbres[:3]
if residue_name in valid_amino_acids:
chain_name = atom.chain
is_protein = True
break
if is_protein:
# Try to locate this sequence in the sequence group.
found = False
for seq in sequence_group.sequences:
if seq.from_maestro:
if seq.maestro_chain_name == chain_name and \
seq.maestro_entry_id == entry_id:
found = True
break
if found:
atom_dict = {}
for res in seq.residues:
atom_dict[res.maestro_atom_num] = res
for atom in chain.atom:
if atom.index in atom_dict:
res = atom_dict[atom.index]
if colors:
res.maestro_color = atom.color.rgb
if seq.color_scheme == constants.COLOR_MAESTRO:
res.color = res.maestro_color
if selection:
res.selected = False
if (entry_id, atom.index) in selected_atoms:
res.selected = True
# Repaint the sequence viewer.
viewer.updateView()
return True
[docs]def getMaestroStructureName(st, entry_id, use_title=False):
"""
Generates a name for Maestro entry.
"""
if entry_id == "Scratch":
return "Scratch"
if use_title and "s_m_title" in st.property:
return st.property["s_m_title"]
if "s_pdb_PDB_ID" in st.property:
name = st.property["s_pdb_PDB_ID"]
elif "s_m_title" in st.property:
name = st.property["s_m_title"]
elif "s_m_entry_name" in st.property:
name = st.property["s_m_entry_name"]
else:
name = "Entry " + entry_id
return name
[docs]def getMaestroLigandList(template_list=None,
entry_list=None,
chain_distance=5.0,
list_all=False):
"""
Returns list of ligands extracted from Maestro.
:type template_list: list of Sequences
:param template_list: Optional list of MSV sequences to extract
ligands from. If the list is not specified, all PT chains
will be analyzed.
:note: This function ignores workspace scratch entry.
:type entry list: list of strings
:param entry_list: Optional list of entry IDs to extract ligands
from. If the list is not specified, all PT chains
will be analyzed.
:note: If both template_list and entry_list are specified,
entry_list takes precedence and template_list is not being used.
:type chain_distance: float
:param chain_distance: Distance from protein chain required
to classify a molecule as a ligand. All molecules outside
of this threshold will be ignored.
:type list_all: boolean
:param list_all: Disregards ligand selection and chain distance
criteria and returns a list of all non-protein molecules.
:rtype: tuple of lists
:return: A tuple of lists of ligand names and list of ligands.
The list of ligands consists of 4-tuples: (entry_id, ligand_pdbres,
ligand_asl, ligand_st).
"""
if not maestro:
return [], []
if not AslLigandSearcher:
return [], []
result = maestro.project_table_synchronize()
if not result:
return [], []
project_table = maestro.project_table_get()
if not project_table:
return [], []
ligands = []
name_list = []
entry_list = []
if entry_list:
# Note: This is not reachable
selected_entries = entry_list
else:
selected_entries = []
if template_list:
for seq in template_list:
if seq.from_maestro:
selected_entries.append(seq.maestro_entry_id)
valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1)
asl_ligand_searcher = AslLigandSearcher()
for row in project_table.all_rows:
entry_id = row.entry_id
if selected_entries and entry_id not in selected_entries:
continue
st = row.getStructure(props=True, copy=False)
entry_name = getMaestroStructureName(st, entry_id)
ligands = asl_ligand_searcher.search(st)
lig_pdbres = []
for lig in ligands:
if not list_all:
ligand_asl = lig.ligand_asl
protein_within_asl = \
"(not %s) and (fillres within %g (%s)) and (protein)" % \
(ligand_asl, chain_distance, ligand_asl)
# Test if the ligand is within 5A from the protein chain
close_atoms = evaluate_asl(st, protein_within_asl)
if not close_atoms:
continue
atom = lig.st.atom[1]
lig_name = atom.pdbres[:3]
lig_pdbres.append(lig_name)
name_list.append(entry_name + " " + lig_name + " " + atom.chain +
":" + str(atom.resnum))
entry_list.append((entry_id, lig.pdbres, lig.ligand_asl, lig.st))
if list_all:
for mol in st.molecule:
atom = st.atom[mol.getAtomIndices()[0]]
pdbres = atom.pdbres[:3]
if pdbres not in valid_amino_acids and pdbres not in lig_pdbres:
name_list.append(entry_name + " " + pdbres + " " +
atom.chain + ":" + str(atom.resnum))
entry_list.append(
(entry_id, atom.pdbres, "mol.num " + str(mol.number),
mol.extractStructure()))
return (name_list, entry_list)
[docs]def getEntryByName(name):
"""
Returns Maestro entry ID of specified name.
"""
if not maestro:
return None
result = maestro.project_table_synchronize()
if not result:
return None
project_table = maestro.project_table_get()
if not project_table:
return None
for row in project_table.all_rows:
entry_id = row.entry_id
st = row.getStructure(props=True, copy=False)
if "s_m_entry_name" in st.property:
if name == st.property["s_m_entry_name"]:
return entry_id
return None
[docs]def getEntryByJobName(name):
"""
Returns Maestro entry ID of specified job name.
"""
if not maestro:
return None
result = maestro.project_table_synchronize()
if not result:
return None
project_table = maestro.project_table_get()
if not project_table:
return None
for row in project_table.all_rows:
entry_id = row.entry_id
st = row.getStructure(props=True, copy=False)
if "s_m_job_name" in st.property:
if name == st.property["s_m_job_name"]:
return entry_id
return None
[docs]def maestroGetListOfEntryIDs():
"""
Returns a list of entry IDs in Maestro PT.
"""
if not maestro:
return []
result = maestro.project_table_synchronize()
if not result:
return []
project_table = maestro.project_table_get()
if not project_table:
return []
list = []
for row in project_table.all_rows:
list.append(row.entry_id)
return list
[docs]def maestroColorEntrySurface(viewer, sequence):
"""
Colors a surface in Maestro according to sequence colors.
"""
if sequence.from_maestro and sequence.maestro_entry_id:
propagateColorsToMaestro(viewer, sequence)
cmd = 'surfacescheme \"Molecular Surface\" entry=\"' + \
str(sequence.maestro_entry_id) + "\" scheme=\"Atom Color\""
try:
maestro.command(cmd)
except:
return False
return True
[docs]def maestroIncludeAllEntrySequences(sequence_group, entry_id):
"""
Includes all sequences belonging to the given entry.
"""
for sequence in sequence_group.sequences:
if sequence.maestro_entry_id == entry_id:
sequence.maestro_included = True
[docs]def maestroExcludeAllEntrySequences(sequence_group, entry_id):
"""
Excludes all sequences belonging to the given entry.
"""
for sequence in sequence_group.sequences:
if sequence.maestro_entry_id == entry_id:
sequence.maestro_included = False
[docs]def maestroGetProjectPath(old=False):
"""
Returns a path to a current project.
"""
if not maestro or not mm:
return None
try:
maestro.project_table_synchronize()
pt = maestro.project_table_get()
path = pt.fullname + os.sep + ".mmproj-admin" + os.sep
if not old:
path += "additional_data" + os.sep
return path
except:
return None
STD_SASA_DICT = None
[docs]def maestroCalculateSASA(
sequence_group,
sequences=[], # noqa: M511
selected_only=False,
normalize=True,
percentage=False):
"""
Calculates solvent accessible surface per residue.
:type sequence_group: SequenceGroup
:param: Target sequence group.
:type sequences: list of Sequences
:param: Optional list of sequences. If the list is provided it will be
used instead of the sequence group.
:type selected_only: bool
:param selected_only: Calculate SASA only for selected sequences.
:type normalize: bool
:param normalize: Should we normalize the SASA area by area of amino acid
in default conformation.
:type percentage: bool
:param percentage: If True return percentage SASA instead of absolute
values.
"""
global STD_SASA_DICT
if not maestro:
return False
project_table = None
try:
maestro.project_table_synchronize()
project_table = maestro.project_table_get()
except:
return False
if not project_table:
return False
# Percentage implies normalization
if percentage:
normalize = True
maestro_entries = {}
if STD_SASA_DICT is None:
STD_SASA_DICT = calculate_sasa_dict(ignore_backbone=True,
include_calpha=True)
group = sequence_group.sequences
if sequences:
group = sequences
# Loop over all Maestro sequences and build a list of entries.
for seq in group:
if selected_only and not seq.selected:
continue
if seq.from_maestro:
entry = seq.maestro_entry_id
chain = seq.maestro_chain_name
if entry not in maestro_entries:
maestro_entries[entry] = {}
seq._found = False
maestro_entries[entry][chain] = seq
found = False
for row in project_table.all_rows:
# Make sure this row's entry ID and row ID are within MSV sequences.
entry_id = row.entry_id
if entry_id not in list(maestro_entries):
continue
try:
st = row.getStructure(props=True, copy=False)
except:
continue
chains = maestro_entries[entry_id]
full_sasa_list = calculate_sasa_by_atom(st)
side_chain_atoms = evaluate_asl(st, "sidechain or atom.ptype \"CA\"")
for chain in st.chain:
if chain.name not in list(chains):
continue
res_dict = {}
seq = maestro_entries[entry_id][chain.name]
for res in seq.residues:
res_dict[str(res.num) + res.icode] = res
try:
for res in chain.residue:
sasa = 0.0
res_atom_list = res.getAtomIndices()
for anum in res_atom_list:
if anum in side_chain_atoms:
sasa += full_sasa_list[anum - 1]
if normalize and STD_SASA_DICT and \
res.pdbres[:3] in STD_SASA_DICT:
sasa /= STD_SASA_DICT[res.pdbres[:3]]
if percentage:
sasa *= 100.0
res_id = str(res.resnum) + str(res.inscode)
if res_id in res_dict:
res_dict[res_id].area = sasa
found = True
except:
continue
return found
[docs]def maestroFindInteractions(sequence, ligand_list, selected_only=False):
"""
Generates a list of interactions between a specified sequence and
a list of ligands.
:type sequence: Sequence
:param sequence: Sequence the interactions are calculated for.
:type ligand_list: list of tuples
:param ligand_list: List of (ligand_st, ligand_entry_id) of the
ligands.
:rtype: list of interactions per residue
:return: List of interactions calculated per residue. Each position
includes list of interaction strings in the format of
StructuralInteractionFingerprintGenerator.getFingerprintString
Returns None if the calculations were not completed successfully.
"""
if not maestro or not sequence or not ligand_list:
return None
if not StructuralInteractionFingerprintGenerator:
return None
generator = StructuralInteractionFingerprintGenerator()
project_table = None
try:
maestro.project_table_synchronize()
project_table = maestro.project_table_get()
except:
return None
if not project_table:
return None
maestro_entries = {}
if not sequence.from_maestro:
return None
for row in project_table.all_rows:
entry_id = row.entry_id
if entry_id != sequence.maestro_entry_id:
continue
try:
st = row.getStructure(props=True, copy=False)
except:
continue
for chain in st.chain:
if chain.name != sequence.maestro_chain_name:
continue
# Found a Maestro chain, generate interactions
generator.setReceptorStructure(st, entry_id)
for ligand_st, ligand_id in ligand_list:
generator.generateFingerprint(ligand_st, ligand_id, None)
fp_list = []
for res_idx, res in enumerate(sequence.residues):
if res.is_gap:
continue
for lig_idx in range(len(ligand_list)):
fp_list.append(
generator.getFingerprintString(lig_idx, res_idx))
return fp_list
return None
[docs]def maestroGetStructureAlignment(viewer, sequence_group):
"""
Creates a sequence alignment based on structure superposition
of the included entries. Consecutively merges the alignments.
"""
structure_list = sequence_group.getStructureList()
if len(structure_list) < 2:
return
# Extract C-alpha positions for all MSV entries
result = maestro.project_table_synchronize()
if not result:
return False
project_table = maestro.project_table_get()
if not project_table:
return False
ca_positions = {}
valid_structures = []
for sequence in structure_list:
if sequence.from_maestro:
for row in project_table.all_rows:
entry_id = row.entry_id
st = row.getStructure(props=False, copy=False)
for chain in st.chain:
if sequence.maestro_chain_name == chain.name and \
sequence.maestro_entry_id == entry_id:
# Remove gaps and structureless residues
sequence._tmp_res = []
for child in sequence.children:
child._tmp_res = []
for index, res in enumerate(sequence.residues):
if res.is_gap or res.structureless:
continue
sequence._tmp_res.append(sequence.residues[index])
for child in sequence.children:
child._tmp_res.append(child.residues[index])
if not sequence._tmp_res:
continue
sequence.residues = sequence._tmp_res
for child in sequence.children:
child.residues = child._tmp_res
# Extract list of C-alpha positions
ca_positions[sequence] = []
atom_dict = {}
valid_structures.append(sequence)
for res in sequence.residues:
atom_dict[res.maestro_atom_num] = res
for atom in chain.atom:
if atom.index in atom_dict:
res = atom_dict[atom.index]
ca_positions[sequence].append(
(atom.x, atom.y, atom.z))
viewer.contents_changed = True
viewer.updateView()
if len(valid_structures) < 2:
return
str1 = valid_structures[0]
len1 = len(ca_positions[str1])
for idx in range(1, len(valid_structures)):
str2 = valid_structures[idx]
len2 = len(ca_positions[str2])
score_matrix = []
for y in range(0, len1 + 1):
score_matrix.append([0] * (len2 + 2))
# Build score matrix
for y in range(len2):
for x in range(len1):
try:
x1, y1, z1 = ca_positions[str1][x]
x2, y2, z2 = ca_positions[str2][y]
except:
continue
dist2 = (x2 - x1) * (x2 - x1) + \
(y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1)
score = old_div(20.0, (1.0 + old_div(dist2, 25.0)))
score_matrix[x][y] = score
align(str1,
str2,
merge=True,
sequence_group=sequence_group,
gap_open_penalty=-2.0,
gap_extend_penalty=-0.2,
scoring_matrix=score_matrix,
direct_scores=True)
[docs]def maestroGetLigandAnnotations(sequence):
"""
Creates ligand annotations for a given sequence.
"""
if not sequence.has_structure:
return False
if not maestro:
return False
project_table = maestro.project_table_get()
if not project_table:
return False
asl_ligand_searcher = AslLigandSearcher()
res_dict = {}
for res in sequence.residues:
id = str(sequence.chain_id) + str(res.num) + str(res.icode)
res_dict[id] = res
ligand_list = []
for row in project_table.all_rows:
entry_id = row.entry_id
if entry_id != sequence.maestro_entry_id:
continue
st = row.getStructure(props=True, copy=False)
ligands = asl_ligand_searcher.search(st)
if not ligands:
continue
for lig in ligands:
name = lig.pdbres + "\n" + "Chain " + lig.st.atom[1].chain + \
"\n" + str(len(lig.st.atom)) + " atoms"
for child in sequence.children:
if child.name == name:
sequence.children.remove(child)
break
ligand_asl = lig.ligand_asl
protein_within_asl = \
"(not %s) and (fillres within %g (%s)) and (protein)" % \
(ligand_asl, 6.0, ligand_asl)
# Test if the ligand is within 6A from the protein chain
close_atoms_6 = evaluate_asl(st, protein_within_asl)
# Skip ligand w/o contacts
if not close_atoms_6:
continue
protein_within_asl = \
"(not %s) and (fillres within %g (%s)) and (protein)" % \
(ligand_asl, 3.0, ligand_asl)
# Test if the ligand is within 3A from the protein chain
close_atoms_3 = evaluate_asl(st, protein_within_asl)
res6 = []
for atom_idx in close_atoms_6:
atom = st.atom[atom_idx]
id = str(atom.chain) + str(atom.resnum) + str(atom.inscode)
if id in res_dict:
res6.append(res_dict[id])
if not res6:
continue
res3 = []
for atom_idx in close_atoms_3:
atom = st.atom[atom_idx]
id = str(atom.chain) + str(atom.resnum) + str(atom.inscode)
if id in res_dict:
res3.append(res_dict[id])
ligseq = Sequence()
ligand_list.append(ligseq)
ligseq.type = constants.SEQ_ANNOTATION
ligseq.annotation_type = constants.ANNOTATION_LIGAND
ligseq.name = name
ligseq.short_name = name[:3] + ":" + lig.st.atom[1].chain
ligseq.parent_sequence = sequence
sequence.children.append(ligseq)
ligseq.residues = []
for res in sequence.gaplessResidues():
ligres = Residue()
ligres.sequence = ligseq
ligres.code = ' '
ligres.value = -1.0
ligseq.residues.append(ligres)
if res in res3:
ligres.color = (255, 31, 31)
ligres.value = 3.0
elif res in res6:
ligres.color = (255, 127, 63)
ligres.value = 6.0
else:
ligres.color = (191, 191, 191)
sequence.propagateGapsToChildren()
return ligand_list