Source code for schrodinger.ui.sequencealignment.maestro

"""
This module implements interactions between Maestro and multiple sequence
viewer.

Copyright Schrodinger, LLC. All rights reserved.
"""

# Contributors: Deacon Sweeney, Piotr Rotkiewicz

import os
from past.utils import old_div

from . import constants
from .align import align
from .residue import Residue
from .sequence import Sequence
from .structure_utils import calculate_sasa_dict

try:
    from schrodinger.maestro import maestro
except ImportError:
    maestro = None

try:
    from schrodinger.structutils.color import Color
except ImportError:
    pass

try:
    from schrodinger.structutils.analyze import AslLigandSearcher
    from schrodinger.structutils.analyze import calculate_sasa_by_atom
    from schrodinger.structutils.analyze import evaluate_asl
except:
    evaluate_asl = None

try:
    from schrodinger.structutils.interactionfp import \
        StructuralInteractionFingerprintGenerator
except:
    StructuralInteractionFingerprintGenerator = None

try:
    from schrodinger.infra import mm
except ImportError:
    mm = None

try:
    from schrodinger import structure
except:
    structure = None


[docs]def hasMaestro():
    """
    Checks if the program runs from Maestro environment. Additionally, this
    method sets Maestro callbacks.

    :rtype: bool
    :return: True if run from Maestro, False otherwise.
    """
    if maestro:
        return True

    return False


[docs]def maestroSuperposition(viewer, show_panel=True):
    if maestro:
        sequence_group = viewer.sequence_group
        sequence_group.selectAlignedBlocks(selected_only=True)
        maestroSelectResiduesInWorkspace(viewer)
        if show_panel:
            maestro.command("showpanel superimpose:ASL")
        selected_atom_indices = maestro.selected_atoms_get()
        st = maestro.workspace_get()
        query = ""
        for i in selected_atom_indices:
            atom = st.atom[i]
            if "CA" in atom.pdbname:
                if len(query) > 0:
                    query += " | "
                query += " at.n " + str(i)
        if len(query) > 0:
            query = "superimposeset " + query
            try:
                maestro.command(query)
            except:
                return False
            return True
    return False


[docs]def renumberMaestroEntry(viewer, sequence, update_sequence=False):
    """
    This method assigns residue numbers to corresponding structure
    in Maestro workspace.

    It can also do reverse operation, i.e. assign atom and residue
    numbers to MSV sequence if "update_sequence" argument is True.

    :type update_sequence: boolean
    :param update_sequence: Changes seqeunce renumbering direction:
        when False, assigns MSV numbers to Maestro entry. When True,
        assigns Maestro numbers to MSV sequence.

    :type sequence: `Sequence`
    :param: sequence: Sequence to be used for propagating colors to Maestro.
    """
    if not maestro or not sequence.from_maestro:
        return
    result = maestro.project_table_synchronize()
    if not result:
        return False
    project_table = maestro.project_table_get()
    if not project_table:
        return False
    valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1)
    viewer.maestro_busy = True
    is_included = False
    for row in project_table.included_rows:
        entry_id = row.entry_id
        if entry_id != sequence.maestro_entry_id:
            continue
        is_included = True
    for row in project_table.all_rows:
        entry_id = row.entry_id
        if entry_id != sequence.maestro_entry_id:
            continue
        st = row.getStructure(props=True, copy=False)
        if not update_sequence and is_included:
            maestro.command("entrywsexclude entry %s" % (entry_id))
        renumbered = False
        for chain in st.chain:
            is_protein = False
            for atom in chain.atom:
                residue_name = atom.pdbres[:3]
                if residue_name in valid_amino_acids:
                    chain_name = atom.chain
                    is_protein = True
                    break
            if is_protein:
                if chain_name == sequence.maestro_chain_name:
                    renumbered = True
                    res_dict = {}
                    for res in sequence.residues:
                        res_dict[res.maestro_residue_index] = res
                    # Get unsorted list of residues
                    residue_list = structure.get_residues_unsorted(chain)
                    for res_index, residue in enumerate(residue_list):
                        ca_atom = None
                        for atom in residue.atom:
                            if "CA" in atom.pdbname:
                                ca_atom = atom
                                break
                        if not ca_atom:
                            continue
                        if res_index in res_dict:
                            res = res_dict[res_index]
                            if update_sequence:
                                res.num = atom.resnum
                                res.icode = atom.inscode
                                res.maestro_atom_num = atom.index
                                res.maestro_color = atom.color.rgb
                            else:
                                for atom in residue.atom:
                                    atom.resnum = res.num
                                    atom.inscode = res.icode
        # Set the structure so that numbering changes are preserved.
        if renumbered and not update_sequence:
            row.setStructure(st)
            maestro.project_table_update()
            if is_included:
                maestro.command("entrywsinclude entry %s" % (entry_id))
    viewer.maestro_busy = False
    return True


[docs]def propagateColorsToMaestro(viewer, sequence, template_colors=False):
    """
    This method propagates colors from the given sequence to
    corresponding structure in Maestro workspace.

    :type sequence: `Sequence`
    :param: sequence: Sequence to be used for propagating colors to Maestro.
    """
    sequence_group = viewer.sequence_group
    if not maestro:
        return
    result = maestro.project_table_synchronize()
    if not result:
        return False
    project_table = maestro.project_table_get()
    if not project_table:
        return False
    valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1)
    do_redraw = False
    viewer.maestro_busy = True
    if template_colors:
        tr = tg = tb = 255
        structure_list = sequence_group.getStructureList(omit_reference=True)
        if sequence in structure_list:
            tr, tg, tb = constants.TEMPLATE_COLORS[
                structure_list.index(sequence) % constants.N_TEMPLATE_COLORS]
    if sequence.from_maestro:
        for row in project_table.included_rows:
            entry_id = row.entry_id
            if entry_id != sequence.maestro_entry_id:
                continue
            st = row.getStructure(props=True, copy=False)
            for chain in st.chain:
                is_protein = False
                for atom in chain.atom:
                    residue_name = atom.pdbres[:3]
                    if residue_name in valid_amino_acids:
                        chain_name = atom.chain
                        is_protein = True
                        break
                if is_protein:
                    if chain_name == sequence.maestro_chain_name:
                        maestro.command("entrywsexclude entry %s" % (entry_id))
                        res_dict = {}
                        for res in sequence.residues:
                            res_dict[res.maestro_atom_num] = res
                        for res_index, residue in enumerate(chain.residue):
                            ca_atom = None
                            for atom in residue.atom:
                                if "CA" in atom.pdbname:
                                    ca_atom = atom
                                    break
                            if not ca_atom:
                                continue
                            if ca_atom.index in res_dict:
                                res = res_dict[ca_atom.index]
                                if not template_colors:
                                    r, g, b = res.color
                                    if res.marked_color:
                                        r, g, b = res.marked_color
                                elif res.model:
                                    r = tr
                                    g = tg
                                    b = tb
                                else:
                                    r = g = b = 255
                                new_color = Color((int(r), int(g), int(b)))
                                residue.color = new_color
                                # Change color of all atoms
                                for res_atom in residue.atom:
                                    res_atom.color = new_color
                                    # res_atom.setColorRGB(r, g, b)
                                # Set the structure so that color changes are preserved.
            row.setStructure(st)
            maestro.command("entrywsinclude entry %s" % (entry_id))
            do_redraw = True
    if do_redraw:
        maestro.redraw_request()
    viewer.maestro_busy = False
    return True


[docs]def maestroCenterOnResidue(viewer, sequence_area, res):
    """
    This function centers workspace on a specified residue. If the residue
    doesn't belong to the Maestro entry, or something else goes wrong,
    the function returns False. Otherwise, if the operation was successful,
    it will return True.

    :type sequence_area: `SequenceArea`
    :param sequence_area: Sequence area that is calling this function.

    :type res: `Residue`
    :param res: residue to be centered on

    :rtype: bool
    :return: True if operation successfull, otherwise False
    """
    if maestro and res and res.sequence and res.sequence.from_maestro:
        # Zoom-in onto the residue.
        command = "fit (entry.id \"%s\" and chain.name \"%s\"" \
            "and res.num \"%s\" and res.inscode \"%s\")" \
            % (res.sequence.maestro_entry_id, res.sequence.maestro_chain_name,
               res.num, res.icode)
        try:
            maestro.command(command)
        except:
            return False
        # Center on alpha carbon.
        command = "spotcenter \"%s\"" % (res.maestro_atom_num)
        try:
            maestro.command(command)
        except:
            return False
        # Zoom-in onto the residue.
        command = "enhance3d (entry.id \"%s\" and chain.name \"%s\"" \
            " and res.num \"%s\" and res.inscode \"%s\")" \
            % (res.sequence.maestro_entry_id, res.sequence.maestro_chain_name,
               res.num, res.icode)
        try:
            maestro.command(command)
        except:
            return False
        return True
    return False


[docs]def maestroMutateResidue(viewer, res, new_code, mutate_in_workspace=True):
    """
    Mutates a single residue. Optionally modifies a corresponding Maestro
    entry.

    :note: This method doesn't update anntotations. The caller should update
        the annotations according to the introduced mutation.

    :type res: `Residue`
    :param res: residue to be mutated

    :type new_code: str
    :param new_code: code of the new residue.

    :rtype: bool
    :return: True if successfully synchronized with Maestro, False if not.
    """
    # No need to mutate to the same residue.
    if new_code == res.code:
        return
    if res and res.sequence and res.sequence.type == constants.SEQ_AMINO_ACIDS:
        if new_code in list(constants.AMINO_ACIDS) and res.code != new_code:
            if not maestro or not res.sequence.from_maestro:
                # Set new residue code.
                res.code = new_code
                res.makeName()
                # If gap, just replace it by amino acid.
                res.is_gap = False
                # Make the residue structureless
                res.structureless = True
                res.structure = []
            elif not res.is_gap and res.sequence.from_maestro and \
                    mutate_in_workspace:
                # Set new residue code.
                res.code = new_code
                aa_name = constants.AMINO_ACIDS[new_code][0]
                # Pick the amino acid fragment.
                command = "fragment peptide " + aa_name
                maestro.command(command)
                # Mutate this residue.
                command = "mutate entry.id \"%s\" and chain.name \"%s\"" \
                    " and res.num \"%s\" and res.inscode \"%s\"" \
                    % (str(res.sequence.maestro_entry_id),
                       str(res.sequence.maestro_chain_name), str(res.num),
                       str(res.icode))
                try:
                    maestro.command(command)
                except:
                    return
                # Now we need to update the sequence to reflect
                # atom numbering changes
                renumberMaestroEntry(viewer, res.sequence, update_sequence=True)


[docs]def maestroGetSelectedResiduesASL(sequence_group, sequence=None):
    """
    Returns an ASL string corresponding to selection in MSV.
    This function attempts to produce a compact ASL expression,
    e.g. consecutive selected residues are expressed as a range.
    """
    try:
        # Build a selection query.
        asl_query = ""
        tmp_query = ""
        for seq in sequence_group.sequences:
            if sequence and seq != sequence:
                continue
            if seq.from_maestro and \
               seq.hasSelectedResidues() and \
               seq.maestro_included:
                if seq.hasAllSelectedResidues():
                    tmp_query = "(entry.id \"%s\" AND chain.name \"%s\")" % \
                        (seq.maestro_entry_id, seq.maestro_chain_name)
                else:
                    tmp_query = "(entry.id \"%s\" AND chain.name \"%s\" AND fillres(" % \
                        (seq.maestro_entry_id, seq.maestro_chain_name)
                    previous = False
                    res_str = ""
                    for res in seq.residues:
                        if res.selected and res.maestro_atom_num is not None:
                            if previous:
                                res_str += "," + str(res.maestro_atom_num)
                            else:
                                res_str += "atom.entrynum " + \
                                str(res.maestro_atom_num)
                            previous = True
                    if res_str:
                        tmp_query += res_str + "))"
                    else:
                        tmp_query = ""
                if tmp_query:
                    if asl_query:
                        asl_query += " OR " + tmp_query
                    else:
                        asl_query = tmp_query
    except:
        return ""
    return asl_query


[docs]def maestroSelectResiduesInWorkspace(viewer):
    """
    This method selects residues in Maestro workspace based on current
    selection.

    :type sequence_group: `SequenceGroup`
    :param sequence_group: source sequence group

    :rtype: bool
    :return: True if successfully synchronized with Maestro, False if not.
    """
    if not maestro:
        return False
    sequence_group = viewer.sequence_group
    viewer.maestro_busy = True
    asl_query = maestroGetSelectedResiduesASL(sequence_group)
    try:
        if asl_query != "":
            command = "workspaceselectionreplace " + asl_query
            maestro.command(command)
        else:
            maestro.command("workspaceselectionclear")
    except:
        pass
    viewer.maestro_busy = False


[docs]def maestroSynchronize(sequence_group):
    if not maestro:
        return False
    project_table = None
    try:
        project_table = maestro.project_table_get()
    except:
        return False
    if not project_table:
        return False
    maestro_entries = {}
    # Loop over all Maestro sequences and build a list of entries.
    for seq in sequence_group.sequences:
        seq._found = True
        if seq.from_maestro:
            entry = seq.maestro_entry_id
            try:
                row = project_table.getRow(entry_id=entry)
            except:
                seq._found = False
            if row is None:
                seq._found = False
            chain = seq.maestro_chain_name
            if entry not in maestro_entries:
                maestro_entries[entry] = {}
            maestro_entries[entry][chain] = seq

    # Get set of workspace entry ids
    workspace = maestro.workspace_get()
    workspace_entry_ids = set()
    for atom in workspace.atom:
        workspace_entry_ids.add(atom.entry_id)
    for seq in sequence_group.sequences:
        if seq.maestro_entry_id in workspace_entry_ids:
            seq.maestro_included = True
        else:
            seq.maestro_included = False
    # Change 'from_maestro' flag for non-existing sequences.
    for entry in maestro_entries.values():
        for seq in entry.values():
            if not seq._found:
                seq.from_maestro = False


[docs]def maestroIncorporateEntries(
        sequence_group,
        what="included",
        entry_ids=None,
        ignore=[],  # noqa: M511
        include=False,
        incorporate_scratch_entry=False,
        align_func=None,
        ct=None,
        use_title=False,
        viewer=None):
    """
    Incorporates Maestro entries into the sequence viewer.

    """
    if not maestro and not ct:
        return False
    if maestro:
        maestro.project_table_synchronize()
        project_table = maestro.project_table_get()
    maestro_entries = {}
    valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1)
    st_list = []
    if ct:
        st_list.append((None, ct))
    else:
        # Loop over all Maestro sequences and build a list of entries.
        for seq in sequence_group.sequences:
            if seq.from_maestro:
                entry = seq.maestro_entry_id
                chain = seq.maestro_chain_name
                if entry not in maestro_entries:
                    maestro_entries[entry] = {}
                maestro_entries[entry][chain] = seq
        if what == "included":
            rows = reversed(list(project_table.included_rows))
        elif what == "selected":
            rows = project_table.selected_rows
        elif what == "all":
            rows = project_table.all_rows
        elif what == "custom":
            rows = (project_table.getRow(eid) for eid in entry_ids)
        else:
            return False

        st_list = [(row.entry_id, row.getStructure()) for row in rows]

        workspace = maestro.workspace_get(copy=True)
        workspace_entry_ids = set()
        for atom in workspace.atom:
            workspace_entry_ids.add(atom.entry_id)
        if not st_list and workspace and incorporate_scratch_entry:
            st_list.append(("Scratch", workspace))
    for entry_id, st in st_list:
        if entry_id in ignore:
            # Ignore this entry ID.
            continue
        found = False
        has_seqres = True
        try:
            data_handle = mm.mmct_ct_m2io_get_unrequested_handle(st)
        except:
            try:
                data_handle = mm.mmct_ct_get_or_open_additional_data(st, True)
            except:
                has_seqres = False
        chain_seq_dict = {}
        if has_seqres:
            num_seqres_blocks = mm.m2io_get_number_blocks(
                data_handle, "m_PDB_SEQRES")
            if num_seqres_blocks == 0:
                has_seqres = False
        if has_seqres:
            try:
                mm.m2io_goto_block(data_handle, "m_PDB_SEQRES", 1)
            except:
                has_seqres = False
        if has_seqres:
            num_rows = mm.m2io_get_index_dimension(data_handle)
            for i in range(1, num_rows + 1):
                chain_strings = mm.m2io_get_string_indexed(
                    data_handle, i, ["s_pdb_chain_id"])
                seqres_strings = mm.m2io_get_string_indexed(
                    data_handle, i, ["s_pdb_SEQRES"])
                chain = chain_strings[0]
                seqres = seqres_strings[0]
                residue_list = seqres.split(" ")

                seq_elements = []
                for res in residue_list:
                    if len(res) == 3:
                        if res in valid_amino_acids:
                            seq_elements.append(
                                constants.AMINO_ACIDS_3_TO_1[res])
                        else:
                            seq_elements.append("X")

                sequence_string = "".join(seq_elements)
                if sequence_string:
                    chain_seq_dict[chain] = sequence_string

            mm.m2io_leave_block(data_handle)
        for chain in st.chain:
            seq = None
            if entry_id in maestro_entries:
                chains = maestro_entries[entry_id]
                if chain.name in chains:
                    seq = chains[chain.name]
            is_protein = False
            for residue in chain.residue:
                if residue.pdbres[:3] in valid_amino_acids:
                    is_protein = True
                    break
            if not is_protein:
                continue
            secondary = None
            ssb = None
            if not seq:
                seq = Sequence()
                seq.type = constants.SEQ_AMINO_ACIDS
                seq.chain_id = chain.name[0]
                seq.color_scheme = constants.COLOR_MAESTRO
                seq.has_structure = True
                if entry_id:
                    seq.from_maestro = True
                    seq.maestro_included = entry_id in workspace_entry_ids
                    seq.maestro_entry_id = entry_id
                    seq.maestro_chain_name = chain.name
                sequence_group.sequences.append(seq)
            else:
                for child in seq.children:
                    if child.type == constants.SEQ_SECONDARY:
                        secondary = child
                        break
                for child in seq.children:
                    if child.type == constants.SEQ_ANNOTATION and \
                            child.annotation_type == constants.ANNOTATION_SSBOND:
                        ssb = child
                        break
            if ssb is None:
                ssb = seq.createSSBondAssignment()
                ssb.parent_sequence = seq
                seq.children.append(ssb)
            # This code comes from mcpro, zmat.py
            bond_list = []
            asl = "a.pt SG and (res. CYS or res. CYX)"
            if chain.name > ' ':
                asl += " and (chain.name %s)" % str(chain.name)
            s_atoms = evaluate_asl(st, asl)
            found = False
            for s_atom in s_atoms:
                for s2_atom in s_atoms:
                    if mm.mmct_is_atom_bonded(st, s_atom, s2_atom) == mm.TRUE:
                        found = True
                        if s_atom <= s2_atom:
                            resi1 = str(
                                st.atom[s_atom].resnum) + \
                                str(st.atom[s_atom].inscode)
                            resi2 = str(
                                st.atom[s2_atom].resnum) + \
                                str(st.atom[s2_atom].inscode)
                            bond_list.append((resi1, resi2))
            if not found:
                seq.children.remove(ssb)
            if secondary is None:
                secondary = seq.createSecondaryAssignment()
                secondary.parent_sequence = seq
                seq.children.append(secondary)
            if entry_id:
                name = getMaestroStructureName(st,
                                               entry_id,
                                               use_title=use_title)
            else:
                name = "Structure"
            name = name[:]
            short_name = name[:]
            seq.name = name
            seq.short_name = short_name
            residues = []
            ss_residues = []
            ssb_residues = []

            # Get unsorted list of residues (get residues by connectivity)
            residue_list = structure.get_residues_unsorted(chain)

            for res_index, residue in enumerate(residue_list):
                ca_atom = None
                for atom in residue.atom:
                    if "CA" in atom.pdbname:
                        ca_atom = atom
                        break
                if not ca_atom:
                    continue
                if residue.pdbres[:3] in valid_amino_acids:
                    code = constants.AMINO_ACIDS_3_TO_1[residue.pdbres[:3]]
                else:
                    code = 'X'
                res = Residue()
                res.sequence = seq
                res.code = code
                res.name = residue.pdbres[:3]
                res.num = residue.resnum
                res.icode = residue.inscode
                res.structureless = False
                res.bfactor = ca_atom.temperature_factor
                if entry_id:
                    res.maestro_color = ca_atom.color.rgb
                    res.maestro_atom_num = ca_atom.index
                    res.maestro_residue_index = res_index
                ss_res = Residue()
                ss_res.sequence = seq
                if ssb:
                    ssb_res = Residue()
                    ssb_res.sequence = seq
                    ssb_res.code = ' '
                    ssb_residues.append(ssb_res)
                if ca_atom.secondary_structure == 1:
                    ss_res.code = 'H'
                    res.ss_code = 'H'
                elif ca_atom.secondary_structure == 2:
                    ss_res.code = 'E'
                    res.ss_code = 'E'
                else:
                    ss_res.code = ' '
                    res.ss_code = ' '
                residues.append(res)
                ss_residues.append(ss_res)
            if residues:
                seq.residues = residues
                secondary.residues = ss_residues
                if ssb and ssb in seq.children:
                    ssb.residues = ssb_residues
                if chain.name in chain_seq_dict and align_func:
                    # Use full SEQRES sequence.
                    full_seq = Sequence()
                    full_seq.appendResidues(chain_seq_dict[chain.name])
                    align_func(full_seq, seq)
                if ssb and ssb in seq.children:
                    #ssb.residues = ssb_residues
                    new_bond_list = []
                    for rid1, rid2 in bond_list:
                        ridx1 = seq.getResidueIndex(rid1)
                        ridx2 = seq.getResidueIndex(rid2)
                        if ridx1 is not None and ridx2 is not None:
                            new_bond_list.append((ridx1, ridx2))
                    seq.ssb_bond_list = new_bond_list
                    ssb.bond_list = new_bond_list
                    if not new_bond_list:
                        seq.children.remove(ssb)
                    else:
                        ssb.height = int(len(new_bond_list) * 0.5 + 0.5)
                for res in seq.residues:
                    res.sequence = seq

                for child in seq.children:
                    child.parent_sequence = seq
                    for res in child.residues:
                        res.sequence = child
            else:
                sequence_group.sequences.remove(seq)
    if not structure and viewer:
        synchronizePropertiesWithMaestro(viewer, selection=True)
    return True


[docs]def maestroCalculateMinimumDistance(sequence):
    """
    Calculates minimum distance in agnstroms between selected residues
    in the specified sequence and remaining residues in the sequence.
    """
    if not maestro:
        return False
    result = maestro.project_table_synchronize()
    if not result:
        return False
    project_table = maestro.project_table_get()
    if not project_table:
        return False
    valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1)
    c_alpha_dict = {}
    for row in project_table.all_rows:
        st = row.getStructure(props=True, copy=False)
        chain_name = None
        entry_id = None
        for chain in st.chain:
            for atom in chain.atom:
                residue_name = atom.pdbres[:3]
                if residue_name in valid_amino_acids:
                    chain_name = atom.chain
                    entry_id = st.property["s_m_entry_id"]
                    break
            if sequence.maestro_chain_name == chain_name and \
                    sequence.maestro_entry_id == entry_id:
                c_alpha_dict = {}
                for atom in chain.atom:
                    if "CA" in atom.pdbname:
                        res_id = str(atom.resnum) + str(atom.inscode)
                        c_alpha_dict[res_id] = atom
                break
    distances = [1000000.0] * len(sequence.residues)
    for sel_res in sequence.residues:
        if sel_res.selected:
            res_id = str(sel_res.num) + str(sel_res.icode)
            if res_id in c_alpha_dict:
                sel_atom = c_alpha_dict[res_id]
                for idx, res in enumerate(sequence.residues):
                    res_id = str(res.num) + str(res.icode)
                    if res_id in c_alpha_dict:
                        atom = c_alpha_dict[res_id]
                        dx = atom.x - sel_atom.x
                        dy = atom.y - sel_atom.y
                        dz = atom.z - sel_atom.z
                        dist = dx * dx + dy * dy + dz * dz
                        if dist < distances[idx]:
                            distances[idx] = dist
    return distances


[docs]def maestroExtractStructureData(sequence):
    """
    Before building a homology model, we need to pass the structural data
    from Maestro to MSV. This function extracts the structural data
    from the Maestro entry coupled with the given sequence. The data
    is preformatted in PDB format, so it can be directly saved as Prime
    template file.
    """
    if not maestro:
        return False
    result = maestro.project_table_synchronize()
    if not result:
        return False
    project_table = maestro.project_table_get()
    if not project_table:
        return False
    if sequence.from_maestro:
        for row in project_table.all_rows:
            entry_id = row.entry_id
            st = row.getStructure(props=True, copy=False)
            for chain in st.chain:
                if sequence.maestro_chain_name == chain.name and \
                   sequence.maestro_entry_id == entry_id:
                    atom_dict = {}
                    for res in sequence.residues:
                        atom_dict[res.maestro_atom_num] = res
                    for atom in chain.atom:
                        if atom.index in atom_dict:
                            res = atom_dict[atom.index]
                            res.structure = []
                            maestro_res = atom.getResidue()
                            for maestro_atom in maestro_res.atom:
                                temp_factor = 0.0
                                if maestro_atom.temperature_factor:
                                    temp_factor = maestro_atom.temperature_factor
                                pdbline = \
                                "ATOM  %5d %-4s %4s%c%4d%c   %8.3f%8.3f%8.3f%6.2f%6.2f           %-2s\n"\
                                 % (maestro_atom.index, maestro_atom.pdbname,
                                    maestro_atom.pdbres, maestro_atom.chain,
                                    maestro_atom.resnum, maestro_atom.inscode,
                                    maestro_atom.x, maestro_atom.y,
                                    maestro_atom.z,
                                    1.00, # occupancy
                                    temp_factor,
                                    maestro_atom.element)
                                res.structure.append(pdbline)
                    return True
    return False


[docs]def synchronizePropertiesWithMaestro(viewer, colors=False, selection=False):
    """
    Synchronizes Maestro workspace colors or selection state with
    corresponding sequences in the sequence viewer.

    :type colors: bool
    :param colors: Synchronize colors.

    :type selection: bool
    :param selection: Synchronize selection state.

    :rtype: bool
    :return: True on successful synchronization, False otherwise
    """
    sequence_group = viewer.sequence_group
    if not maestro:
        return False
    result = maestro.project_table_synchronize()
    if not result:
        return False
    project_table = maestro.project_table_get()
    if not project_table:
        return False
    valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1)
    if selection:
        workspace_st = maestro.workspace_get(copy=False)
        workspace_selection = maestro.selected_atoms_get()
        selected_atoms = []
        for idx in workspace_selection:
            atom = workspace_st.atom[idx]
            selected_atoms.append((atom.entry_id, atom.number_by_entry))
    for row in project_table.included_rows:
        entry_id = row.entry_id
        st = row.getStructure(props=True, copy=False)
        for chain in st.chain:
            is_protein = False
            for atom in chain.atom:
                residue_name = atom.pdbres[:3]
                if residue_name in valid_amino_acids:
                    chain_name = atom.chain
                    is_protein = True
                    break
            if is_protein:
                # Try to locate this sequence in the sequence group.
                found = False
                for seq in sequence_group.sequences:
                    if seq.from_maestro:
                        if seq.maestro_chain_name == chain_name and \
                           seq.maestro_entry_id == entry_id:
                            found = True
                            break
                if found:
                    atom_dict = {}
                    for res in seq.residues:
                        atom_dict[res.maestro_atom_num] = res
                    for atom in chain.atom:
                        if atom.index in atom_dict:
                            res = atom_dict[atom.index]
                            if colors:
                                res.maestro_color = atom.color.rgb
                                if seq.color_scheme == constants.COLOR_MAESTRO:
                                    res.color = res.maestro_color
                            if selection:
                                res.selected = False
                                if (entry_id, atom.index) in selected_atoms:
                                    res.selected = True
    # Repaint the sequence viewer.
    viewer.updateView()
    return True


[docs]def getMaestroStructureName(st, entry_id, use_title=False):
    """
    Generates a name for Maestro entry.
    """
    if entry_id == "Scratch":
        return "Scratch"
    if use_title and "s_m_title" in st.property:
        return st.property["s_m_title"]
    if "s_pdb_PDB_ID" in st.property:
        name = st.property["s_pdb_PDB_ID"]
    elif "s_m_title" in st.property:
        name = st.property["s_m_title"]
    elif "s_m_entry_name" in st.property:
        name = st.property["s_m_entry_name"]
    else:
        name = "Entry " + entry_id
    return name


[docs]def getMaestroLigandList(template_list=None,
                         entry_list=None,
                         chain_distance=5.0,
                         list_all=False):
    """
    Returns list of ligands extracted from Maestro.

    :type template_list: list of Sequences
    :param template_list: Optional list of MSV sequences to extract
        ligands from. If the list is not specified, all PT chains
        will be analyzed.

    :note: This function ignores workspace scratch entry.

    :type entry list: list of strings
    :param entry_list: Optional list of entry IDs to extract ligands
        from. If the list is not specified, all PT chains
        will be analyzed.

    :note: If both template_list and entry_list are specified,
        entry_list takes precedence and template_list is not being used.

    :type chain_distance: float
    :param chain_distance: Distance from protein chain required
        to classify a molecule as a ligand. All molecules outside
        of this threshold will be ignored.

    :type list_all: boolean
    :param list_all: Disregards ligand selection and chain distance
        criteria and returns a list of all non-protein molecules.

    :rtype: tuple of lists
    :return: A tuple of lists of ligand names and list of ligands.
        The list of ligands consists of 4-tuples: (entry_id, ligand_pdbres,
        ligand_asl, ligand_st).
    """
    if not maestro:
        return [], []
    if not AslLigandSearcher:
        return [], []
    result = maestro.project_table_synchronize()
    if not result:
        return [], []
    project_table = maestro.project_table_get()
    if not project_table:
        return [], []
    ligands = []
    name_list = []
    entry_list = []
    if entry_list:
        # Note: This is not reachable
        selected_entries = entry_list
    else:
        selected_entries = []
        if template_list:
            for seq in template_list:
                if seq.from_maestro:
                    selected_entries.append(seq.maestro_entry_id)
    valid_amino_acids = list(constants.AMINO_ACIDS_3_TO_1)
    asl_ligand_searcher = AslLigandSearcher()
    for row in project_table.all_rows:
        entry_id = row.entry_id
        if selected_entries and entry_id not in selected_entries:
            continue
        st = row.getStructure(props=True, copy=False)
        entry_name = getMaestroStructureName(st, entry_id)
        ligands = asl_ligand_searcher.search(st)
        lig_pdbres = []
        for lig in ligands:
            if not list_all:
                ligand_asl = lig.ligand_asl
                protein_within_asl = \
                    "(not %s) and (fillres within %g (%s)) and (protein)" % \
                    (ligand_asl, chain_distance, ligand_asl)
                # Test if the ligand is within 5A from the protein chain
                close_atoms = evaluate_asl(st, protein_within_asl)
                if not close_atoms:
                    continue
            atom = lig.st.atom[1]
            lig_name = atom.pdbres[:3]
            lig_pdbres.append(lig_name)
            name_list.append(entry_name + " " + lig_name + " " + atom.chain +
                             ":" + str(atom.resnum))
            entry_list.append((entry_id, lig.pdbres, lig.ligand_asl, lig.st))
        if list_all:
            for mol in st.molecule:
                atom = st.atom[mol.getAtomIndices()[0]]
                pdbres = atom.pdbres[:3]
                if pdbres not in valid_amino_acids and pdbres not in lig_pdbres:
                    name_list.append(entry_name + " " + pdbres + " " +
                                     atom.chain + ":" + str(atom.resnum))
                    entry_list.append(
                        (entry_id, atom.pdbres, "mol.num " + str(mol.number),
                         mol.extractStructure()))
    return (name_list, entry_list)


[docs]def getEntryByName(name):
    """
    Returns Maestro entry ID of specified name.
    """
    if not maestro:
        return None
    result = maestro.project_table_synchronize()
    if not result:
        return None
    project_table = maestro.project_table_get()
    if not project_table:
        return None
    for row in project_table.all_rows:
        entry_id = row.entry_id
        st = row.getStructure(props=True, copy=False)
        if "s_m_entry_name" in st.property:
            if name == st.property["s_m_entry_name"]:
                return entry_id
    return None


[docs]def getEntryByJobName(name):
    """
    Returns Maestro entry ID of specified job name.
    """
    if not maestro:
        return None
    result = maestro.project_table_synchronize()
    if not result:
        return None
    project_table = maestro.project_table_get()
    if not project_table:
        return None
    for row in project_table.all_rows:
        entry_id = row.entry_id
        st = row.getStructure(props=True, copy=False)
        if "s_m_job_name" in st.property:
            if name == st.property["s_m_job_name"]:
                return entry_id
    return None


[docs]def maestroGetListOfEntryIDs():
    """
    Returns a list of entry IDs in Maestro PT.
    """
    if not maestro:
        return []
    result = maestro.project_table_synchronize()
    if not result:
        return []
    project_table = maestro.project_table_get()
    if not project_table:
        return []
    list = []
    for row in project_table.all_rows:
        list.append(row.entry_id)
    return list


[docs]def maestroColorEntrySurface(viewer, sequence):
    """
    Colors a surface in Maestro according to sequence colors.
    """
    if sequence.from_maestro and sequence.maestro_entry_id:
        propagateColorsToMaestro(viewer, sequence)
        cmd = 'surfacescheme \"Molecular Surface\" entry=\"' + \
            str(sequence.maestro_entry_id) + "\" scheme=\"Atom Color\""
        try:
            maestro.command(cmd)
        except:
            return False
    return True


[docs]def maestroIncludeAllEntrySequences(sequence_group, entry_id):
    """
    Includes all sequences belonging to the given entry.
    """
    for sequence in sequence_group.sequences:
        if sequence.maestro_entry_id == entry_id:
            sequence.maestro_included = True


[docs]def maestroExcludeAllEntrySequences(sequence_group, entry_id):
    """
    Excludes all sequences belonging to the given entry.
    """
    for sequence in sequence_group.sequences:
        if sequence.maestro_entry_id == entry_id:
            sequence.maestro_included = False


[docs]def maestroGetProjectPath(old=False):
    """
    Returns a path to a current project.
    """
    if not maestro or not mm:
        return None
    try:
        maestro.project_table_synchronize()
        pt = maestro.project_table_get()
        path = pt.fullname + os.sep + ".mmproj-admin" + os.sep
        if not old:
            path += "additional_data" + os.sep
        return path
    except:
        return None


STD_SASA_DICT = None


[docs]def maestroCalculateSASA(
        sequence_group,
        sequences=[],  # noqa: M511
        selected_only=False,
        normalize=True,
        percentage=False):
    """
    Calculates solvent accessible surface per residue.

    :type sequence_group: SequenceGroup
    :param: Target sequence group.

    :type sequences: list of Sequences
    :param: Optional list of sequences. If the list is provided it will be
        used instead of the sequence group.

    :type selected_only: bool
    :param selected_only: Calculate SASA only for selected sequences.

    :type normalize: bool
    :param normalize: Should we normalize the SASA area by area of amino acid
        in default conformation.

    :type percentage: bool
    :param percentage: If True return percentage SASA instead of absolute
        values.
    """
    global STD_SASA_DICT

    if not maestro:
        return False
    project_table = None
    try:
        maestro.project_table_synchronize()
        project_table = maestro.project_table_get()
    except:
        return False
    if not project_table:
        return False
    # Percentage implies normalization
    if percentage:
        normalize = True
    maestro_entries = {}
    if STD_SASA_DICT is None:
        STD_SASA_DICT = calculate_sasa_dict(ignore_backbone=True,
                                            include_calpha=True)
    group = sequence_group.sequences
    if sequences:
        group = sequences
    # Loop over all Maestro sequences and build a list of entries.
    for seq in group:
        if selected_only and not seq.selected:
            continue
        if seq.from_maestro:
            entry = seq.maestro_entry_id
            chain = seq.maestro_chain_name
            if entry not in maestro_entries:
                maestro_entries[entry] = {}
            seq._found = False
            maestro_entries[entry][chain] = seq
    found = False
    for row in project_table.all_rows:
        # Make sure this row's entry ID and row ID are within MSV sequences.
        entry_id = row.entry_id
        if entry_id not in list(maestro_entries):
            continue
        try:
            st = row.getStructure(props=True, copy=False)
        except:
            continue
        chains = maestro_entries[entry_id]
        full_sasa_list = calculate_sasa_by_atom(st)
        side_chain_atoms = evaluate_asl(st, "sidechain or atom.ptype \"CA\"")
        for chain in st.chain:
            if chain.name not in list(chains):
                continue
            res_dict = {}
            seq = maestro_entries[entry_id][chain.name]
            for res in seq.residues:
                res_dict[str(res.num) + res.icode] = res
            try:
                for res in chain.residue:
                    sasa = 0.0
                    res_atom_list = res.getAtomIndices()
                    for anum in res_atom_list:
                        if anum in side_chain_atoms:
                            sasa += full_sasa_list[anum - 1]
                    if normalize and STD_SASA_DICT and \
                            res.pdbres[:3] in STD_SASA_DICT:
                        sasa /= STD_SASA_DICT[res.pdbres[:3]]
                    if percentage:
                        sasa *= 100.0
                    res_id = str(res.resnum) + str(res.inscode)
                    if res_id in res_dict:
                        res_dict[res_id].area = sasa
                found = True
            except:
                continue
    return found


[docs]def maestroFindInteractions(sequence, ligand_list, selected_only=False):
    """
    Generates a list of interactions between a specified sequence and
    a list of ligands.

    :type sequence: Sequence
    :param sequence: Sequence the interactions are calculated for.

    :type ligand_list: list of tuples
    :param ligand_list: List of (ligand_st, ligand_entry_id) of the
        ligands.

    :rtype: list of interactions per residue
    :return: List of interactions calculated per residue. Each position
        includes list of interaction strings in the format of
        StructuralInteractionFingerprintGenerator.getFingerprintString
        Returns None if the calculations were not completed successfully.
    """
    if not maestro or not sequence or not ligand_list:
        return None
    if not StructuralInteractionFingerprintGenerator:
        return None
    generator = StructuralInteractionFingerprintGenerator()
    project_table = None
    try:
        maestro.project_table_synchronize()
        project_table = maestro.project_table_get()
    except:
        return None
    if not project_table:
        return None
    maestro_entries = {}
    if not sequence.from_maestro:
        return None
    for row in project_table.all_rows:
        entry_id = row.entry_id
        if entry_id != sequence.maestro_entry_id:
            continue
        try:
            st = row.getStructure(props=True, copy=False)
        except:
            continue
        for chain in st.chain:
            if chain.name != sequence.maestro_chain_name:
                continue
            # Found a Maestro chain, generate interactions
            generator.setReceptorStructure(st, entry_id)
            for ligand_st, ligand_id in ligand_list:
                generator.generateFingerprint(ligand_st, ligand_id, None)
            fp_list = []
            for res_idx, res in enumerate(sequence.residues):
                if res.is_gap:
                    continue
                for lig_idx in range(len(ligand_list)):
                    fp_list.append(
                        generator.getFingerprintString(lig_idx, res_idx))
            return fp_list
    return None


[docs]def maestroGetStructureAlignment(viewer, sequence_group):
    """
    Creates a sequence alignment based on structure superposition
    of the included entries. Consecutively merges the alignments.
    """
    structure_list = sequence_group.getStructureList()
    if len(structure_list) < 2:
        return
    # Extract C-alpha positions for all MSV entries
    result = maestro.project_table_synchronize()
    if not result:
        return False
    project_table = maestro.project_table_get()
    if not project_table:
        return False
    ca_positions = {}
    valid_structures = []
    for sequence in structure_list:
        if sequence.from_maestro:
            for row in project_table.all_rows:
                entry_id = row.entry_id
                st = row.getStructure(props=False, copy=False)
                for chain in st.chain:
                    if sequence.maestro_chain_name == chain.name and \
                       sequence.maestro_entry_id == entry_id:
                        # Remove gaps and structureless residues
                        sequence._tmp_res = []
                        for child in sequence.children:
                            child._tmp_res = []
                        for index, res in enumerate(sequence.residues):
                            if res.is_gap or res.structureless:
                                continue
                            sequence._tmp_res.append(sequence.residues[index])
                            for child in sequence.children:
                                child._tmp_res.append(child.residues[index])
                        if not sequence._tmp_res:
                            continue
                        sequence.residues = sequence._tmp_res
                        for child in sequence.children:
                            child.residues = child._tmp_res
                        # Extract list of C-alpha positions
                        ca_positions[sequence] = []
                        atom_dict = {}
                        valid_structures.append(sequence)
                        for res in sequence.residues:
                            atom_dict[res.maestro_atom_num] = res
                        for atom in chain.atom:
                            if atom.index in atom_dict:
                                res = atom_dict[atom.index]
                                ca_positions[sequence].append(
                                    (atom.x, atom.y, atom.z))
    viewer.contents_changed = True
    viewer.updateView()
    if len(valid_structures) < 2:
        return
    str1 = valid_structures[0]
    len1 = len(ca_positions[str1])
    for idx in range(1, len(valid_structures)):
        str2 = valid_structures[idx]
        len2 = len(ca_positions[str2])
        score_matrix = []
        for y in range(0, len1 + 1):
            score_matrix.append([0] * (len2 + 2))
        # Build score matrix
        for y in range(len2):
            for x in range(len1):
                try:
                    x1, y1, z1 = ca_positions[str1][x]
                    x2, y2, z2 = ca_positions[str2][y]
                except:
                    continue
                dist2 = (x2 - x1) * (x2 - x1) + \
                    (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1)
                score = old_div(20.0, (1.0 + old_div(dist2, 25.0)))
                score_matrix[x][y] = score
        align(str1,
              str2,
              merge=True,
              sequence_group=sequence_group,
              gap_open_penalty=-2.0,
              gap_extend_penalty=-0.2,
              scoring_matrix=score_matrix,
              direct_scores=True)


[docs]def maestroGetLigandAnnotations(sequence):
    """
    Creates ligand annotations for a given sequence.
    """
    if not sequence.has_structure:
        return False
    if not maestro:
        return False
    project_table = maestro.project_table_get()
    if not project_table:
        return False
    asl_ligand_searcher = AslLigandSearcher()
    res_dict = {}
    for res in sequence.residues:
        id = str(sequence.chain_id) + str(res.num) + str(res.icode)
        res_dict[id] = res
    ligand_list = []
    for row in project_table.all_rows:
        entry_id = row.entry_id
        if entry_id != sequence.maestro_entry_id:
            continue
        st = row.getStructure(props=True, copy=False)
        ligands = asl_ligand_searcher.search(st)
        if not ligands:
            continue
        for lig in ligands:
            name = lig.pdbres + "\n" + "Chain " + lig.st.atom[1].chain + \
                "\n" + str(len(lig.st.atom)) + " atoms"
            for child in sequence.children:
                if child.name == name:
                    sequence.children.remove(child)
                    break
            ligand_asl = lig.ligand_asl
            protein_within_asl = \
                    "(not %s) and (fillres within %g (%s)) and (protein)" % \
                    (ligand_asl, 6.0, ligand_asl)
            # Test if the ligand is within 6A from the protein chain
            close_atoms_6 = evaluate_asl(st, protein_within_asl)
            # Skip ligand w/o contacts
            if not close_atoms_6:
                continue
            protein_within_asl = \
                "(not %s) and (fillres within %g (%s)) and (protein)" % \
                (ligand_asl, 3.0, ligand_asl)
            # Test if the ligand is within 3A from the protein chain
            close_atoms_3 = evaluate_asl(st, protein_within_asl)
            res6 = []
            for atom_idx in close_atoms_6:
                atom = st.atom[atom_idx]
                id = str(atom.chain) + str(atom.resnum) + str(atom.inscode)
                if id in res_dict:
                    res6.append(res_dict[id])
            if not res6:
                continue
            res3 = []
            for atom_idx in close_atoms_3:
                atom = st.atom[atom_idx]
                id = str(atom.chain) + str(atom.resnum) + str(atom.inscode)
                if id in res_dict:
                    res3.append(res_dict[id])
            ligseq = Sequence()
            ligand_list.append(ligseq)
            ligseq.type = constants.SEQ_ANNOTATION
            ligseq.annotation_type = constants.ANNOTATION_LIGAND
            ligseq.name = name
            ligseq.short_name = name[:3] + ":" + lig.st.atom[1].chain
            ligseq.parent_sequence = sequence
            sequence.children.append(ligseq)
            ligseq.residues = []
            for res in sequence.gaplessResidues():
                ligres = Residue()
                ligres.sequence = ligseq
                ligres.code = ' '
                ligres.value = -1.0
                ligseq.residues.append(ligres)
                if res in res3:
                    ligres.color = (255, 31, 31)
                    ligres.value = 3.0
                elif res in res6:
                    ligres.color = (255, 127, 63)
                    ligres.value = 6.0
                else:
                    ligres.color = (191, 191, 191)
        sequence.propagateGapsToChildren()
    return ligand_list