Source code for schrodinger.ui.sequencealignment.maestro

"""
This module implements interactions between Maestro and multiple sequence
viewer.

Copyright Schrodinger, LLC. All rights reserved.
"""

# Contributors: Deacon Sweeney, Piotr Rotkiewicz

import os
from past.utils import old_div

from . import constants
from .align import align
from .residue import Residue
from .sequence import Sequence
from .structure_utils import calculate_sasa_dict

try:
    from schrodinger.maestro import maestro
except ImportError:
    maestro = None

try:
    from schrodinger.structutils.color import Color
except ImportError:
    pass

try:
    from schrodinger.structutils.analyze import AslLigandSearcher
    from schrodinger.structutils.analyze import calculate_sasa_by_atom
    from schrodinger.structutils.analyze import evaluate_asl
except:
    evaluate_asl = None

try:
    from schrodinger.structutils.interactionfp import \
        StructuralInteractionFingerprintGenerator
except:
    StructuralInteractionFingerprintGenerator = None

try:
    from schrodinger.infra import mm
except ImportError:
    mm = None

try:
    from schrodinger import structure
except:
    structure = None


[docs]def hasMaestro(): """ Checks if the program runs from Maestro environment. Additionally, this method sets Maestro callbacks. :rtype: bool :return: True if run from Maestro, False otherwise. """ if maestro: return True return False
[docs]def maestroSuperposition(viewer, show_panel=True): if maestro: sequence_group = viewer.sequence_group sequence_group.selectAlignedBlocks(selected_only=True) maestroSelectResiduesInWorkspace(viewer) if show_panel: maestro.command("showpanel superimpose:ASL") selected_atom_indices = maestro.selected_atoms_get() st = maestro.workspace_get() query = "" for i in selected_atom_indices: atom = st.atom[i] if "CA" in atom.pdbname: if len(query) > 0: query += " | " query += " at.n " + str(i) if len(query) > 0: query = "superimposeset " + query try: maestro.command(query) except: return False return True return False
[docs]def renumberMaestroEntry(viewer, sequence, update_sequence=False): """ This method assigns residue numbers to corresponding structure in Maestro workspace. It can also do reverse operation, i.e. assign atom and residue numbers to MSV sequence if "update_sequence" argument is True. :type update_sequence: boolean :param update_sequence: Changes seqeunce renumbering direction: when False, assigns MSV numbers to Maestro entry. When True, assigns Maestro numbers to MSV sequence. :type sequence: `Sequence` :param: sequence: Sequence to be used for propagating colors to Maestro. """ if not maestro or not sequence.from_maestro: return result = maestro.project_table_synchronize() if not result: return False project_table = maestro.project_table_get() if not project_table: return False valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1) viewer.maestro_busy = True is_included = False for row in project_table.included_rows: entry_id = row.entry_id if entry_id != sequence.maestro_entry_id: continue is_included = True for row in project_table.all_rows: entry_id = row.entry_id if entry_id != sequence.maestro_entry_id: continue st = row.getStructure(props=True, copy=False) if not update_sequence and is_included: maestro.command("entrywsexclude entry %s" % (entry_id)) renumbered = False for chain in st.chain: is_protein = False for atom in chain.atom: residue_name = atom.pdbres[:3] if residue_name in valid_amino_acids: chain_name = atom.chain is_protein = True break if is_protein: if chain_name == sequence.maestro_chain_name: renumbered = True res_dict = {} for res in sequence.residues: res_dict[res.maestro_residue_index] = res # Get unsorted list of residues residue_list = structure.get_residues_unsorted(chain) for res_index, residue in enumerate(residue_list): ca_atom = None for atom in residue.atom: if "CA" in atom.pdbname: ca_atom = atom break if not ca_atom: continue if res_index in res_dict: res = res_dict[res_index] if update_sequence: res.num = atom.resnum res.icode = atom.inscode res.maestro_atom_num = atom.index res.maestro_color = atom.color.rgb else: for atom in residue.atom: atom.resnum = res.num atom.inscode = res.icode # Set the structure so that numbering changes are preserved. if renumbered and not update_sequence: row.setStructure(st) maestro.project_table_update() if is_included: maestro.command("entrywsinclude entry %s" % (entry_id)) viewer.maestro_busy = False return True
[docs]def propagateColorsToMaestro(viewer, sequence, template_colors=False): """ This method propagates colors from the given sequence to corresponding structure in Maestro workspace. :type sequence: `Sequence` :param: sequence: Sequence to be used for propagating colors to Maestro. """ sequence_group = viewer.sequence_group if not maestro: return result = maestro.project_table_synchronize() if not result: return False project_table = maestro.project_table_get() if not project_table: return False valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1) do_redraw = False viewer.maestro_busy = True if template_colors: tr = tg = tb = 255 structure_list = sequence_group.getStructureList(omit_reference=True) if sequence in structure_list: tr, tg, tb = constants.TEMPLATE_COLORS[ structure_list.index(sequence) % constants.N_TEMPLATE_COLORS] if sequence.from_maestro: for row in project_table.included_rows: entry_id = row.entry_id if entry_id != sequence.maestro_entry_id: continue st = row.getStructure(props=True, copy=False) for chain in st.chain: is_protein = False for atom in chain.atom: residue_name = atom.pdbres[:3] if residue_name in valid_amino_acids: chain_name = atom.chain is_protein = True break if is_protein: if chain_name == sequence.maestro_chain_name: maestro.command("entrywsexclude entry %s" % (entry_id)) res_dict = {} for res in sequence.residues: res_dict[res.maestro_atom_num] = res for res_index, residue in enumerate(chain.residue): ca_atom = None for atom in residue.atom: if "CA" in atom.pdbname: ca_atom = atom break if not ca_atom: continue if ca_atom.index in res_dict: res = res_dict[ca_atom.index] if not template_colors: r, g, b = res.color if res.marked_color: r, g, b = res.marked_color elif res.model: r = tr g = tg b = tb else: r = g = b = 255 new_color = Color((int(r), int(g), int(b))) residue.color = new_color # Change color of all atoms for res_atom in residue.atom: res_atom.color = new_color # res_atom.setColorRGB(r, g, b) # Set the structure so that color changes are preserved. row.setStructure(st) maestro.command("entrywsinclude entry %s" % (entry_id)) do_redraw = True if do_redraw: maestro.redraw_request() viewer.maestro_busy = False return True
[docs]def maestroCenterOnResidue(viewer, sequence_area, res): """ This function centers workspace on a specified residue. If the residue doesn't belong to the Maestro entry, or something else goes wrong, the function returns False. Otherwise, if the operation was successful, it will return True. :type sequence_area: `SequenceArea` :param sequence_area: Sequence area that is calling this function. :type res: `Residue` :param res: residue to be centered on :rtype: bool :return: True if operation successfull, otherwise False """ if maestro and res and res.sequence and res.sequence.from_maestro: # Zoom-in onto the residue. command = "fit (entry.id \"%s\" and chain.name \"%s\"" \ "and res.num \"%s\" and res.inscode \"%s\")" \ % (res.sequence.maestro_entry_id, res.sequence.maestro_chain_name, res.num, res.icode) try: maestro.command(command) except: return False # Center on alpha carbon. command = "spotcenter \"%s\"" % (res.maestro_atom_num) try: maestro.command(command) except: return False # Zoom-in onto the residue. command = "enhance3d (entry.id \"%s\" and chain.name \"%s\"" \ " and res.num \"%s\" and res.inscode \"%s\")" \ % (res.sequence.maestro_entry_id, res.sequence.maestro_chain_name, res.num, res.icode) try: maestro.command(command) except: return False return True return False
[docs]def maestroMutateResidue(viewer, res, new_code, mutate_in_workspace=True): """ Mutates a single residue. Optionally modifies a corresponding Maestro entry. :note: This method doesn't update anntotations. The caller should update the annotations according to the introduced mutation. :type res: `Residue` :param res: residue to be mutated :type new_code: str :param new_code: code of the new residue. :rtype: bool :return: True if successfully synchronized with Maestro, False if not. """ # No need to mutate to the same residue. if new_code == res.code: return if res and res.sequence and res.sequence.type == constants.SEQ_AMINO_ACIDS: if new_code in list(constants.AMINO_ACIDS) and res.code != new_code: if not maestro or not res.sequence.from_maestro: # Set new residue code. res.code = new_code res.makeName() # If gap, just replace it by amino acid. res.is_gap = False # Make the residue structureless res.structureless = True res.structure = [] elif not res.is_gap and res.sequence.from_maestro and \ mutate_in_workspace: # Set new residue code. res.code = new_code aa_name = constants.AMINO_ACIDS[new_code][0] # Pick the amino acid fragment. command = "fragment peptide " + aa_name maestro.command(command) # Mutate this residue. command = "mutate entry.id \"%s\" and chain.name \"%s\"" \ " and res.num \"%s\" and res.inscode \"%s\"" \ % (str(res.sequence.maestro_entry_id), str(res.sequence.maestro_chain_name), str(res.num), str(res.icode)) try: maestro.command(command) except: return # Now we need to update the sequence to reflect # atom numbering changes renumberMaestroEntry(viewer, res.sequence, update_sequence=True)
[docs]def maestroGetSelectedResiduesASL(sequence_group, sequence=None): """ Returns an ASL string corresponding to selection in MSV. This function attempts to produce a compact ASL expression, e.g. consecutive selected residues are expressed as a range. """ try: # Build a selection query. asl_query = "" tmp_query = "" for seq in sequence_group.sequences: if sequence and seq != sequence: continue if seq.from_maestro and \ seq.hasSelectedResidues() and \ seq.maestro_included: if seq.hasAllSelectedResidues(): tmp_query = "(entry.id \"%s\" AND chain.name \"%s\")" % \ (seq.maestro_entry_id, seq.maestro_chain_name) else: tmp_query = "(entry.id \"%s\" AND chain.name \"%s\" AND fillres(" % \ (seq.maestro_entry_id, seq.maestro_chain_name) previous = False res_str = "" for res in seq.residues: if res.selected and res.maestro_atom_num is not None: if previous: res_str += "," + str(res.maestro_atom_num) else: res_str += "atom.entrynum " + \ str(res.maestro_atom_num) previous = True if res_str: tmp_query += res_str + "))" else: tmp_query = "" if tmp_query: if asl_query: asl_query += " OR " + tmp_query else: asl_query = tmp_query except: return "" return asl_query
[docs]def maestroSelectResiduesInWorkspace(viewer): """ This method selects residues in Maestro workspace based on current selection. :type sequence_group: `SequenceGroup` :param sequence_group: source sequence group :rtype: bool :return: True if successfully synchronized with Maestro, False if not. """ if not maestro: return False sequence_group = viewer.sequence_group viewer.maestro_busy = True asl_query = maestroGetSelectedResiduesASL(sequence_group) try: if asl_query != "": command = "workspaceselectionreplace " + asl_query maestro.command(command) else: maestro.command("workspaceselectionclear") except: pass viewer.maestro_busy = False
[docs]def maestroSynchronize(sequence_group): if not maestro: return False project_table = None try: project_table = maestro.project_table_get() except: return False if not project_table: return False maestro_entries = {} # Loop over all Maestro sequences and build a list of entries. for seq in sequence_group.sequences: seq._found = True if seq.from_maestro: entry = seq.maestro_entry_id try: row = project_table.getRow(entry_id=entry) except: seq._found = False if row is None: seq._found = False chain = seq.maestro_chain_name if entry not in maestro_entries: maestro_entries[entry] = {} maestro_entries[entry][chain] = seq # Get set of workspace entry ids workspace = maestro.workspace_get() workspace_entry_ids = set() for atom in workspace.atom: workspace_entry_ids.add(atom.entry_id) for seq in sequence_group.sequences: if seq.maestro_entry_id in workspace_entry_ids: seq.maestro_included = True else: seq.maestro_included = False # Change 'from_maestro' flag for non-existing sequences. for entry in maestro_entries.values(): for seq in entry.values(): if not seq._found: seq.from_maestro = False
[docs]def maestroIncorporateEntries( sequence_group, what="included", entry_ids=None, ignore=[], # noqa: M511 include=False, incorporate_scratch_entry=False, align_func=None, ct=None, use_title=False, viewer=None): """ Incorporates Maestro entries into the sequence viewer. """ if not maestro and not ct: return False if maestro: maestro.project_table_synchronize() project_table = maestro.project_table_get() maestro_entries = {} valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1) st_list = [] if ct: st_list.append((None, ct)) else: # Loop over all Maestro sequences and build a list of entries. for seq in sequence_group.sequences: if seq.from_maestro: entry = seq.maestro_entry_id chain = seq.maestro_chain_name if entry not in maestro_entries: maestro_entries[entry] = {} maestro_entries[entry][chain] = seq if what == "included": rows = reversed(list(project_table.included_rows)) elif what == "selected": rows = project_table.selected_rows elif what == "all": rows = project_table.all_rows elif what == "custom": rows = (project_table.getRow(eid) for eid in entry_ids) else: return False st_list = [(row.entry_id, row.getStructure()) for row in rows] workspace = maestro.workspace_get(copy=True) workspace_entry_ids = set() for atom in workspace.atom: workspace_entry_ids.add(atom.entry_id) if not st_list and workspace and incorporate_scratch_entry: st_list.append(("Scratch", workspace)) for entry_id, st in st_list: if entry_id in ignore: # Ignore this entry ID. continue found = False has_seqres = True try: data_handle = mm.mmct_ct_m2io_get_unrequested_handle(st) except: try: data_handle = mm.mmct_ct_get_or_open_additional_data(st, True) except: has_seqres = False chain_seq_dict = {} if has_seqres: num_seqres_blocks = mm.m2io_get_number_blocks( data_handle, "m_PDB_SEQRES") if num_seqres_blocks == 0: has_seqres = False if has_seqres: try: mm.m2io_goto_block(data_handle, "m_PDB_SEQRES", 1) except: has_seqres = False if has_seqres: num_rows = mm.m2io_get_index_dimension(data_handle) for i in range(1, num_rows + 1): chain_strings = mm.m2io_get_string_indexed( data_handle, i, ["s_pdb_chain_id"]) seqres_strings = mm.m2io_get_string_indexed( data_handle, i, ["s_pdb_SEQRES"]) chain = chain_strings[0] seqres = seqres_strings[0] residue_list = seqres.split(" ") seq_elements = [] for res in residue_list: if len(res) == 3: if res in valid_amino_acids: seq_elements.append( constants.AMINO_ACIDS_3_TO_1[res]) else: seq_elements.append("X") sequence_string = "".join(seq_elements) if sequence_string: chain_seq_dict[chain] = sequence_string mm.m2io_leave_block(data_handle) for chain in st.chain: seq = None if entry_id in maestro_entries: chains = maestro_entries[entry_id] if chain.name in chains: seq = chains[chain.name] is_protein = False for residue in chain.residue: if residue.pdbres[:3] in valid_amino_acids: is_protein = True break if not is_protein: continue secondary = None ssb = None if not seq: seq = Sequence() seq.type = constants.SEQ_AMINO_ACIDS seq.chain_id = chain.name[0] seq.color_scheme = constants.COLOR_MAESTRO seq.has_structure = True if entry_id: seq.from_maestro = True seq.maestro_included = entry_id in workspace_entry_ids seq.maestro_entry_id = entry_id seq.maestro_chain_name = chain.name sequence_group.sequences.append(seq) else: for child in seq.children: if child.type == constants.SEQ_SECONDARY: secondary = child break for child in seq.children: if child.type == constants.SEQ_ANNOTATION and \ child.annotation_type == constants.ANNOTATION_SSBOND: ssb = child break if ssb is None: ssb = seq.createSSBondAssignment() ssb.parent_sequence = seq seq.children.append(ssb) # This code comes from mcpro, zmat.py bond_list = [] asl = "a.pt SG and (res. CYS or res. CYX)" if chain.name > ' ': asl += " and (chain.name %s)" % str(chain.name) s_atoms = evaluate_asl(st, asl) found = False for s_atom in s_atoms: for s2_atom in s_atoms: if mm.mmct_is_atom_bonded(st, s_atom, s2_atom) == mm.TRUE: found = True if s_atom <= s2_atom: resi1 = str( st.atom[s_atom].resnum) + \ str(st.atom[s_atom].inscode) resi2 = str( st.atom[s2_atom].resnum) + \ str(st.atom[s2_atom].inscode) bond_list.append((resi1, resi2)) if not found: seq.children.remove(ssb) if secondary is None: secondary = seq.createSecondaryAssignment() secondary.parent_sequence = seq seq.children.append(secondary) if entry_id: name = getMaestroStructureName(st, entry_id, use_title=use_title) else: name = "Structure" name = name[:] short_name = name[:] seq.name = name seq.short_name = short_name residues = [] ss_residues = [] ssb_residues = [] # Get unsorted list of residues (get residues by connectivity) residue_list = structure.get_residues_unsorted(chain) for res_index, residue in enumerate(residue_list): ca_atom = None for atom in residue.atom: if "CA" in atom.pdbname: ca_atom = atom break if not ca_atom: continue if residue.pdbres[:3] in valid_amino_acids: code = constants.AMINO_ACIDS_3_TO_1[residue.pdbres[:3]] else: code = 'X' res = Residue() res.sequence = seq res.code = code res.name = residue.pdbres[:3] res.num = residue.resnum res.icode = residue.inscode res.structureless = False res.bfactor = ca_atom.temperature_factor if entry_id: res.maestro_color = ca_atom.color.rgb res.maestro_atom_num = ca_atom.index res.maestro_residue_index = res_index ss_res = Residue() ss_res.sequence = seq if ssb: ssb_res = Residue() ssb_res.sequence = seq ssb_res.code = ' ' ssb_residues.append(ssb_res) if ca_atom.secondary_structure == 1: ss_res.code = 'H' res.ss_code = 'H' elif ca_atom.secondary_structure == 2: ss_res.code = 'E' res.ss_code = 'E' else: ss_res.code = ' ' res.ss_code = ' ' residues.append(res) ss_residues.append(ss_res) if residues: seq.residues = residues secondary.residues = ss_residues if ssb and ssb in seq.children: ssb.residues = ssb_residues if chain.name in chain_seq_dict and align_func: # Use full SEQRES sequence. full_seq = Sequence() full_seq.appendResidues(chain_seq_dict[chain.name]) align_func(full_seq, seq) if ssb and ssb in seq.children: #ssb.residues = ssb_residues new_bond_list = [] for rid1, rid2 in bond_list: ridx1 = seq.getResidueIndex(rid1) ridx2 = seq.getResidueIndex(rid2) if ridx1 is not None and ridx2 is not None: new_bond_list.append((ridx1, ridx2)) seq.ssb_bond_list = new_bond_list ssb.bond_list = new_bond_list if not new_bond_list: seq.children.remove(ssb) else: ssb.height = int(len(new_bond_list) * 0.5 + 0.5) for res in seq.residues: res.sequence = seq for child in seq.children: child.parent_sequence = seq for res in child.residues: res.sequence = child else: sequence_group.sequences.remove(seq) if not structure and viewer: synchronizePropertiesWithMaestro(viewer, selection=True) return True
[docs]def maestroCalculateMinimumDistance(sequence): """ Calculates minimum distance in agnstroms between selected residues in the specified sequence and remaining residues in the sequence. """ if not maestro: return False result = maestro.project_table_synchronize() if not result: return False project_table = maestro.project_table_get() if not project_table: return False valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1) c_alpha_dict = {} for row in project_table.all_rows: st = row.getStructure(props=True, copy=False) chain_name = None entry_id = None for chain in st.chain: for atom in chain.atom: residue_name = atom.pdbres[:3] if residue_name in valid_amino_acids: chain_name = atom.chain entry_id = st.property["s_m_entry_id"] break if sequence.maestro_chain_name == chain_name and \ sequence.maestro_entry_id == entry_id: c_alpha_dict = {} for atom in chain.atom: if "CA" in atom.pdbname: res_id = str(atom.resnum) + str(atom.inscode) c_alpha_dict[res_id] = atom break distances = [1000000.0] * len(sequence.residues) for sel_res in sequence.residues: if sel_res.selected: res_id = str(sel_res.num) + str(sel_res.icode) if res_id in c_alpha_dict: sel_atom = c_alpha_dict[res_id] for idx, res in enumerate(sequence.residues): res_id = str(res.num) + str(res.icode) if res_id in c_alpha_dict: atom = c_alpha_dict[res_id] dx = atom.x - sel_atom.x dy = atom.y - sel_atom.y dz = atom.z - sel_atom.z dist = dx * dx + dy * dy + dz * dz if dist < distances[idx]: distances[idx] = dist return distances
[docs]def maestroExtractStructureData(sequence): """ Before building a homology model, we need to pass the structural data from Maestro to MSV. This function extracts the structural data from the Maestro entry coupled with the given sequence. The data is preformatted in PDB format, so it can be directly saved as Prime template file. """ if not maestro: return False result = maestro.project_table_synchronize() if not result: return False project_table = maestro.project_table_get() if not project_table: return False if sequence.from_maestro: for row in project_table.all_rows: entry_id = row.entry_id st = row.getStructure(props=True, copy=False) for chain in st.chain: if sequence.maestro_chain_name == chain.name and \ sequence.maestro_entry_id == entry_id: atom_dict = {} for res in sequence.residues: atom_dict[res.maestro_atom_num] = res for atom in chain.atom: if atom.index in atom_dict: res = atom_dict[atom.index] res.structure = [] maestro_res = atom.getResidue() for maestro_atom in maestro_res.atom: temp_factor = 0.0 if maestro_atom.temperature_factor: temp_factor = maestro_atom.temperature_factor pdbline = \ "ATOM %5d %-4s %4s%c%4d%c %8.3f%8.3f%8.3f%6.2f%6.2f %-2s\n"\ % (maestro_atom.index, maestro_atom.pdbname, maestro_atom.pdbres, maestro_atom.chain, maestro_atom.resnum, maestro_atom.inscode, maestro_atom.x, maestro_atom.y, maestro_atom.z, 1.00, # occupancy temp_factor, maestro_atom.element) res.structure.append(pdbline) return True return False
[docs]def synchronizePropertiesWithMaestro(viewer, colors=False, selection=False): """ Synchronizes Maestro workspace colors or selection state with corresponding sequences in the sequence viewer. :type colors: bool :param colors: Synchronize colors. :type selection: bool :param selection: Synchronize selection state. :rtype: bool :return: True on successful synchronization, False otherwise """ sequence_group = viewer.sequence_group if not maestro: return False result = maestro.project_table_synchronize() if not result: return False project_table = maestro.project_table_get() if not project_table: return False valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1) if selection: workspace_st = maestro.workspace_get(copy=False) workspace_selection = maestro.selected_atoms_get() selected_atoms = [] for idx in workspace_selection: atom = workspace_st.atom[idx] selected_atoms.append((atom.entry_id, atom.number_by_entry)) for row in project_table.included_rows: entry_id = row.entry_id st = row.getStructure(props=True, copy=False) for chain in st.chain: is_protein = False for atom in chain.atom: residue_name = atom.pdbres[:3] if residue_name in valid_amino_acids: chain_name = atom.chain is_protein = True break if is_protein: # Try to locate this sequence in the sequence group. found = False for seq in sequence_group.sequences: if seq.from_maestro: if seq.maestro_chain_name == chain_name and \ seq.maestro_entry_id == entry_id: found = True break if found: atom_dict = {} for res in seq.residues: atom_dict[res.maestro_atom_num] = res for atom in chain.atom: if atom.index in atom_dict: res = atom_dict[atom.index] if colors: res.maestro_color = atom.color.rgb if seq.color_scheme == constants.COLOR_MAESTRO: res.color = res.maestro_color if selection: res.selected = False if (entry_id, atom.index) in selected_atoms: res.selected = True # Repaint the sequence viewer. viewer.updateView() return True
[docs]def getMaestroStructureName(st, entry_id, use_title=False): """ Generates a name for Maestro entry. """ if entry_id == "Scratch": return "Scratch" if use_title and "s_m_title" in st.property: return st.property["s_m_title"] if "s_pdb_PDB_ID" in st.property: name = st.property["s_pdb_PDB_ID"] elif "s_m_title" in st.property: name = st.property["s_m_title"] elif "s_m_entry_name" in st.property: name = st.property["s_m_entry_name"] else: name = "Entry " + entry_id return name
[docs]def getMaestroLigandList(template_list=None, entry_list=None, chain_distance=5.0, list_all=False): """ Returns list of ligands extracted from Maestro. :type template_list: list of Sequences :param template_list: Optional list of MSV sequences to extract ligands from. If the list is not specified, all PT chains will be analyzed. :note: This function ignores workspace scratch entry. :type entry list: list of strings :param entry_list: Optional list of entry IDs to extract ligands from. If the list is not specified, all PT chains will be analyzed. :note: If both template_list and entry_list are specified, entry_list takes precedence and template_list is not being used. :type chain_distance: float :param chain_distance: Distance from protein chain required to classify a molecule as a ligand. All molecules outside of this threshold will be ignored. :type list_all: boolean :param list_all: Disregards ligand selection and chain distance criteria and returns a list of all non-protein molecules. :rtype: tuple of lists :return: A tuple of lists of ligand names and list of ligands. The list of ligands consists of 4-tuples: (entry_id, ligand_pdbres, ligand_asl, ligand_st). """ if not maestro: return [], [] if not AslLigandSearcher: return [], [] result = maestro.project_table_synchronize() if not result: return [], [] project_table = maestro.project_table_get() if not project_table: return [], [] ligands = [] name_list = [] entry_list = [] if entry_list: # Note: This is not reachable selected_entries = entry_list else: selected_entries = [] if template_list: for seq in template_list: if seq.from_maestro: selected_entries.append(seq.maestro_entry_id) valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1) asl_ligand_searcher = AslLigandSearcher() for row in project_table.all_rows: entry_id = row.entry_id if selected_entries and entry_id not in selected_entries: continue st = row.getStructure(props=True, copy=False) entry_name = getMaestroStructureName(st, entry_id) ligands = asl_ligand_searcher.search(st) lig_pdbres = [] for lig in ligands: if not list_all: ligand_asl = lig.ligand_asl protein_within_asl = \ "(not %s) and (fillres within %g (%s)) and (protein)" % \ (ligand_asl, chain_distance, ligand_asl) # Test if the ligand is within 5A from the protein chain close_atoms = evaluate_asl(st, protein_within_asl) if not close_atoms: continue atom = lig.st.atom[1] lig_name = atom.pdbres[:3] lig_pdbres.append(lig_name) name_list.append(entry_name + " " + lig_name + " " + atom.chain + ":" + str(atom.resnum)) entry_list.append((entry_id, lig.pdbres, lig.ligand_asl, lig.st)) if list_all: for mol in st.molecule: atom = st.atom[mol.getAtomIndices()[0]] pdbres = atom.pdbres[:3] if pdbres not in valid_amino_acids and pdbres not in lig_pdbres: name_list.append(entry_name + " " + pdbres + " " + atom.chain + ":" + str(atom.resnum)) entry_list.append( (entry_id, atom.pdbres, "mol.num " + str(mol.number), mol.extractStructure())) return (name_list, entry_list)
[docs]def getEntryByName(name): """ Returns Maestro entry ID of specified name. """ if not maestro: return None result = maestro.project_table_synchronize() if not result: return None project_table = maestro.project_table_get() if not project_table: return None for row in project_table.all_rows: entry_id = row.entry_id st = row.getStructure(props=True, copy=False) if "s_m_entry_name" in st.property: if name == st.property["s_m_entry_name"]: return entry_id return None
[docs]def getEntryByJobName(name): """ Returns Maestro entry ID of specified job name. """ if not maestro: return None result = maestro.project_table_synchronize() if not result: return None project_table = maestro.project_table_get() if not project_table: return None for row in project_table.all_rows: entry_id = row.entry_id st = row.getStructure(props=True, copy=False) if "s_m_job_name" in st.property: if name == st.property["s_m_job_name"]: return entry_id return None
[docs]def maestroGetListOfEntryIDs(): """ Returns a list of entry IDs in Maestro PT. """ if not maestro: return [] result = maestro.project_table_synchronize() if not result: return [] project_table = maestro.project_table_get() if not project_table: return [] list = [] for row in project_table.all_rows: list.append(row.entry_id) return list
[docs]def maestroColorEntrySurface(viewer, sequence): """ Colors a surface in Maestro according to sequence colors. """ if sequence.from_maestro and sequence.maestro_entry_id: propagateColorsToMaestro(viewer, sequence) cmd = 'surfacescheme \"Molecular Surface\" entry=\"' + \ str(sequence.maestro_entry_id) + "\" scheme=\"Atom Color\"" try: maestro.command(cmd) except: return False return True
[docs]def maestroIncludeAllEntrySequences(sequence_group, entry_id): """ Includes all sequences belonging to the given entry. """ for sequence in sequence_group.sequences: if sequence.maestro_entry_id == entry_id: sequence.maestro_included = True
[docs]def maestroExcludeAllEntrySequences(sequence_group, entry_id): """ Excludes all sequences belonging to the given entry. """ for sequence in sequence_group.sequences: if sequence.maestro_entry_id == entry_id: sequence.maestro_included = False
[docs]def maestroGetProjectPath(old=False): """ Returns a path to a current project. """ if not maestro or not mm: return None try: maestro.project_table_synchronize() pt = maestro.project_table_get() path = pt.fullname + os.sep + ".mmproj-admin" + os.sep if not old: path += "additional_data" + os.sep return path except: return None
STD_SASA_DICT = None
[docs]def maestroCalculateSASA( sequence_group, sequences=[], # noqa: M511 selected_only=False, normalize=True, percentage=False): """ Calculates solvent accessible surface per residue. :type sequence_group: SequenceGroup :param: Target sequence group. :type sequences: list of Sequences :param: Optional list of sequences. If the list is provided it will be used instead of the sequence group. :type selected_only: bool :param selected_only: Calculate SASA only for selected sequences. :type normalize: bool :param normalize: Should we normalize the SASA area by area of amino acid in default conformation. :type percentage: bool :param percentage: If True return percentage SASA instead of absolute values. """ global STD_SASA_DICT if not maestro: return False project_table = None try: maestro.project_table_synchronize() project_table = maestro.project_table_get() except: return False if not project_table: return False # Percentage implies normalization if percentage: normalize = True maestro_entries = {} if STD_SASA_DICT is None: STD_SASA_DICT = calculate_sasa_dict(ignore_backbone=True, include_calpha=True) group = sequence_group.sequences if sequences: group = sequences # Loop over all Maestro sequences and build a list of entries. for seq in group: if selected_only and not seq.selected: continue if seq.from_maestro: entry = seq.maestro_entry_id chain = seq.maestro_chain_name if entry not in maestro_entries: maestro_entries[entry] = {} seq._found = False maestro_entries[entry][chain] = seq found = False for row in project_table.all_rows: # Make sure this row's entry ID and row ID are within MSV sequences. entry_id = row.entry_id if entry_id not in list(maestro_entries): continue try: st = row.getStructure(props=True, copy=False) except: continue chains = maestro_entries[entry_id] full_sasa_list = calculate_sasa_by_atom(st) side_chain_atoms = evaluate_asl(st, "sidechain or atom.ptype \"CA\"") for chain in st.chain: if chain.name not in list(chains): continue res_dict = {} seq = maestro_entries[entry_id][chain.name] for res in seq.residues: res_dict[str(res.num) + res.icode] = res try: for res in chain.residue: sasa = 0.0 res_atom_list = res.getAtomIndices() for anum in res_atom_list: if anum in side_chain_atoms: sasa += full_sasa_list[anum - 1] if normalize and STD_SASA_DICT and \ res.pdbres[:3] in STD_SASA_DICT: sasa /= STD_SASA_DICT[res.pdbres[:3]] if percentage: sasa *= 100.0 res_id = str(res.resnum) + str(res.inscode) if res_id in res_dict: res_dict[res_id].area = sasa found = True except: continue return found
[docs]def maestroFindInteractions(sequence, ligand_list, selected_only=False): """ Generates a list of interactions between a specified sequence and a list of ligands. :type sequence: Sequence :param sequence: Sequence the interactions are calculated for. :type ligand_list: list of tuples :param ligand_list: List of (ligand_st, ligand_entry_id) of the ligands. :rtype: list of interactions per residue :return: List of interactions calculated per residue. Each position includes list of interaction strings in the format of StructuralInteractionFingerprintGenerator.getFingerprintString Returns None if the calculations were not completed successfully. """ if not maestro or not sequence or not ligand_list: return None if not StructuralInteractionFingerprintGenerator: return None generator = StructuralInteractionFingerprintGenerator() project_table = None try: maestro.project_table_synchronize() project_table = maestro.project_table_get() except: return None if not project_table: return None maestro_entries = {} if not sequence.from_maestro: return None for row in project_table.all_rows: entry_id = row.entry_id if entry_id != sequence.maestro_entry_id: continue try: st = row.getStructure(props=True, copy=False) except: continue for chain in st.chain: if chain.name != sequence.maestro_chain_name: continue # Found a Maestro chain, generate interactions generator.setReceptorStructure(st, entry_id) for ligand_st, ligand_id in ligand_list: generator.generateFingerprint(ligand_st, ligand_id, None) fp_list = [] for res_idx, res in enumerate(sequence.residues): if res.is_gap: continue for lig_idx in range(len(ligand_list)): fp_list.append( generator.getFingerprintString(lig_idx, res_idx)) return fp_list return None
[docs]def maestroGetStructureAlignment(viewer, sequence_group): """ Creates a sequence alignment based on structure superposition of the included entries. Consecutively merges the alignments. """ structure_list = sequence_group.getStructureList() if len(structure_list) < 2: return # Extract C-alpha positions for all MSV entries result = maestro.project_table_synchronize() if not result: return False project_table = maestro.project_table_get() if not project_table: return False ca_positions = {} valid_structures = [] for sequence in structure_list: if sequence.from_maestro: for row in project_table.all_rows: entry_id = row.entry_id st = row.getStructure(props=False, copy=False) for chain in st.chain: if sequence.maestro_chain_name == chain.name and \ sequence.maestro_entry_id == entry_id: # Remove gaps and structureless residues sequence._tmp_res = [] for child in sequence.children: child._tmp_res = [] for index, res in enumerate(sequence.residues): if res.is_gap or res.structureless: continue sequence._tmp_res.append(sequence.residues[index]) for child in sequence.children: child._tmp_res.append(child.residues[index]) if not sequence._tmp_res: continue sequence.residues = sequence._tmp_res for child in sequence.children: child.residues = child._tmp_res # Extract list of C-alpha positions ca_positions[sequence] = [] atom_dict = {} valid_structures.append(sequence) for res in sequence.residues: atom_dict[res.maestro_atom_num] = res for atom in chain.atom: if atom.index in atom_dict: res = atom_dict[atom.index] ca_positions[sequence].append( (atom.x, atom.y, atom.z)) viewer.contents_changed = True viewer.updateView() if len(valid_structures) < 2: return str1 = valid_structures[0] len1 = len(ca_positions[str1]) for idx in range(1, len(valid_structures)): str2 = valid_structures[idx] len2 = len(ca_positions[str2]) score_matrix = [] for y in range(0, len1 + 1): score_matrix.append([0] * (len2 + 2)) # Build score matrix for y in range(len2): for x in range(len1): try: x1, y1, z1 = ca_positions[str1][x] x2, y2, z2 = ca_positions[str2][y] except: continue dist2 = (x2 - x1) * (x2 - x1) + \ (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1) score = old_div(20.0, (1.0 + old_div(dist2, 25.0))) score_matrix[x][y] = score align(str1, str2, merge=True, sequence_group=sequence_group, gap_open_penalty=-2.0, gap_extend_penalty=-0.2, scoring_matrix=score_matrix, direct_scores=True)
[docs]def maestroGetLigandAnnotations(sequence): """ Creates ligand annotations for a given sequence. """ if not sequence.has_structure: return False if not maestro: return False project_table = maestro.project_table_get() if not project_table: return False asl_ligand_searcher = AslLigandSearcher() res_dict = {} for res in sequence.residues: id = str(sequence.chain_id) + str(res.num) + str(res.icode) res_dict[id] = res ligand_list = [] for row in project_table.all_rows: entry_id = row.entry_id if entry_id != sequence.maestro_entry_id: continue st = row.getStructure(props=True, copy=False) ligands = asl_ligand_searcher.search(st) if not ligands: continue for lig in ligands: name = lig.pdbres + "\n" + "Chain " + lig.st.atom[1].chain + \ "\n" + str(len(lig.st.atom)) + " atoms" for child in sequence.children: if child.name == name: sequence.children.remove(child) break ligand_asl = lig.ligand_asl protein_within_asl = \ "(not %s) and (fillres within %g (%s)) and (protein)" % \ (ligand_asl, 6.0, ligand_asl) # Test if the ligand is within 6A from the protein chain close_atoms_6 = evaluate_asl(st, protein_within_asl) # Skip ligand w/o contacts if not close_atoms_6: continue protein_within_asl = \ "(not %s) and (fillres within %g (%s)) and (protein)" % \ (ligand_asl, 3.0, ligand_asl) # Test if the ligand is within 3A from the protein chain close_atoms_3 = evaluate_asl(st, protein_within_asl) res6 = [] for atom_idx in close_atoms_6: atom = st.atom[atom_idx] id = str(atom.chain) + str(atom.resnum) + str(atom.inscode) if id in res_dict: res6.append(res_dict[id]) if not res6: continue res3 = [] for atom_idx in close_atoms_3: atom = st.atom[atom_idx] id = str(atom.chain) + str(atom.resnum) + str(atom.inscode) if id in res_dict: res3.append(res_dict[id]) ligseq = Sequence() ligand_list.append(ligseq) ligseq.type = constants.SEQ_ANNOTATION ligseq.annotation_type = constants.ANNOTATION_LIGAND ligseq.name = name ligseq.short_name = name[:3] + ":" + lig.st.atom[1].chain ligseq.parent_sequence = sequence sequence.children.append(ligseq) ligseq.residues = [] for res in sequence.gaplessResidues(): ligres = Residue() ligres.sequence = ligseq ligres.code = ' ' ligres.value = -1.0 ligseq.residues.append(ligres) if res in res3: ligres.color = (255, 31, 31) ligres.value = 3.0 elif res in res6: ligres.color = (255, 127, 63) ligres.value = 6.0 else: ligres.color = (191, 191, 191) sequence.propagateGapsToChildren() return ligand_list