Source code for schrodinger.ui.sequencealignment.sequence_group

"""
Implementation of SequenceGroup class.

Copyright Schrodinger, LLC. All rights reserved.
"""

# Contributors: Piotr Rotkiewicz

import copy
import math
import string
from past.utils import old_div

import schrodinger
from schrodinger.ui.sequencealignment.sequence import delete_from_str

from . import constants
from . import utils
from .residue import Residue
from .sequence import Sequence

try:
    from schrodinger.protein.sequence import find_generalized_pattern
except ImportError:
    find_generalized_pattern = None

maestro = schrodinger.get_maestro()


[docs]class SequenceGroup(object):
    """
    This class is a container for the sequences displayed in the sequence
    viewer. The class performs various operations on sets of sequences
    or on the entire group. After the group is modified, `SequenceViewer`
    update() method should be called.
    """

[docs]    def __init__(self):

        self.sequences = []
        self.profile = None  #Global consensus profile sequence.
        self.reference = None  #Reference sequence
        self.tree = None
        self.consider_gaps = True  # when calculating sequence identity.
        self.name = ""  #Group name
        self.color_mode = constants.COLOR_SIDECHAIN_CHEMISTRY  #current color scheme.
        self.build_mode = 0  #: Homology modeling mode.
        self.user_annotations = []
        self.custom_color = (255, 255, 255)
        self.background_color = (255, 255, 255)
        self.inv_background_color = (0, 0, 0)
        #: Calculate sequence identity in selected columns only
        self.identity_in_columns = False

[docs]    def copyForUndo(self, attributes_only=True):
        group_copy = copy.copy(self)
        if attributes_only:
            group_copy.sequences = []
            for seq in self.sequences:
                group_copy.sequences.append(seq.copyForUndo(deep_copy=False))
            group_copy.profile = copy.copy(self.profile)
            group_copy.tree = copy.copy(self.tree)
        else:
            group_copy.sequences = copy.deepcopy(self.sequences)
            group_copy.profile = copy.deepcopy(self.profile)
            group_copy.tree = copy.deepcopy(self.tree)
        return group_copy

[docs]    def clear(self):
        """
        Deletes the entire contents of self.
        """
        self.sequences = []
        self.profile = Sequence()
        self.tree = None
        self.reference = None

[docs]    def addSequence(self, sequence):
        """
        Adds a new sequence to self.
        """
        if sequence and sequence not in self.sequences:
            self.sequences.append(sequence)

[docs]    def hasSelectedResidues(self):
        """
        Check if the group has any residues selected.

        :rtype: bool
        :return: True if there are any residues selected within the group,
            False otherwise
        """
        for seq in self.sequences:
            for res in seq.residues:
                if res.selected:
                    return True
        return False

    #Used in tests only, but should get deleted eventually.
[docs]    def countSelectedResidues(self):
        """
        Counts selected residues.

        :rtype: int
        :return: Number of selected residues.
        """
        count = 0
        for seq in self.sequences:
            if seq.isValidProtein():
                for res in seq.residues:
                    if res.selected:
                        count += 1
        return count

[docs]    def hasSelectedSequences(self,
                             exclude_reference=False,
                             check_children=False):
        """
        Check if there are any selected sequences within the group.

        :type check_children: bool (default=False)
        :param check_children: optional parameter, if True, the function will
            also check child sequences.

        :rtype: bool
        :return: True if there are any sequences selected in the group,
            False otherwise
        """
        for seq in self.sequences:
            if seq.selected and (not exclude_reference or
                                 seq != self.reference):
                return True
            if check_children:
                for child in seq.children:
                    if child.selected:
                        return True
        return False

[docs]    def selectColumns(self, start, end, select=True):
        """
        Select all columns within alignment positon range from start to end.

        :type start: int
        :param start: first column to be selected

        :type end: int
        :param end: last column to be selected
        """
        if start is None or end is None:
            return
        if start > end:
            tmp = end
            end = start
            start = tmp
        if start < 0:
            start = 0
        for seq in self.sequences:
            for column in range(start, end + 1):
                if seq.length() > column:
                    seq.residues[column].selected = select

[docs]    def unselectAll(self, make_active=False):
        """
        Unselect all residues within the entire group.
        """
        for seq in self.sequences:
            if make_active:
                seq.makeActive()
            seq.unselectResidues()
            for child in seq.children:
                child.unselectResidues()

[docs]    def anchorSelection(self):
        for seq in self.sequences:
            if seq.isValidProtein():
                for res in seq.residues:
                    if res.selected:
                        res.selected = False
                        res.active = True
                    else:
                        res.active = False

[docs]    def clearAnchors(self):
        for seq in self.sequences:
            if seq.isValidProtein():
                for res in seq.residues:
                    res.active = True

[docs]    def hideSelected(self):
        """
        Hide all selected sequences (including children).
        """
        for seq in self.sequences:
            if seq.selected:
                seq.visible = False
                seq.selected = False
                for child in seq.children:
                    child.selected = False

[docs]    def hideSequences(self, sequences):
        """
        Hide all sequences passed in list.

        :param sequences: sequences to hide
        :type  sequences: list of `sequences<Sequence>`
        """
        for seq in sequences:
            seq.visible = False
            seq.selected = False
            for child in seq.children:
                child.visible = False
                child.selected = False

[docs]    def showSequences(self, sequences):
        """
        Show all sequences passed in list.

        :param sequences: sequences to show
        :type  sequences: list of `sequences<Sequence>`
        """
        for seq in sequences:
            seq.visible = True
            for child in seq.children:
                child.visible = True

[docs]    def showAll(self):
        """
        Makes all sequences within the group visible.
        """
        for seq in self.sequences:
            seq.visible = True
            for child in seq.children:
                child.visible = True

[docs]    def deleteSelected(self):
        """
        Deletes all selected sequences.
        :note: tree will be removed.
        """
        filtered_sequences = [seq for seq in self.sequences if not seq.selected]
        self.sequences = filtered_sequences
        for seq in self.sequences:
            seq.children = [
                child for child in seq.children if not child.selected
            ]
        if len(self.sequences) == 0:
            self.profile = None
            self.reference = None
        self.tree = []
        self.updateReference()
        return self.removeLonelyRuler()

[docs]    def updateReference(self, ignore_maestro=False):
        """
        This function updates the reference sequence.
        """
        if not self.reference or self.reference not in self.sequences:
            # Reset just in case the reference is not valid anymore.
            self.reference = None
            for seq in self.sequences:
                if seq and seq.isValidProtein():
                    if ignore_maestro and seq.from_maestro:
                        continue
                    self.reference = seq
                    break
        # Move refrence to the top position.
        if self.reference and self.reference in self.sequences:
            # self.sequences.remove(self.reference)
            index = 0
            for index, seq in enumerate(self.sequences):
                if seq and not \
                  (seq.type == constants.SEQ_RULER or seq.global_sequence):
                    break
            self.sequences.remove(self.reference)
            self.sequences.insert(index, self.reference)
        self.addRuler(update_only=True)

[docs]    def fillGaps(self):
        """
        Replaces all selected residues in visible sequences with gaps.
        """
        none_selected = not self.hasSelectedSequences()
        for seq in self.sequences:
            if seq.visible and (none_selected or seq.selected):
                for res in seq.residues:
                    if res.selected:
                        res.code = constants.UNLOCKED_GAP_SYMBOL
                        res.is_gap = True
                        res.inverted = False
                        res.color = (0, 0, 0)

[docs]    def removeAllGaps(self):
        """
        Removes all gaps from the visible sequences.
        """
        none_selected = not self.hasSelectedSequences()
        for seq in self.sequences:
            if seq.visible and (none_selected or seq.selected):
                seq.removeAllGaps(selected_only=True)
                seq.propagateGapsToChildren()

[docs]    def deleteSelectedResidues(self):
        """
        Deletes selected residues from the visible sequences.
        """
        for seq in self.sequences:
            if seq.visible:
                for child in seq.children:
                    for index, res in enumerate(child.residues):
                        if index < seq.length():
                            res.selected = seq.residues[index].selected
                        else:
                            # Truncate extra residues. This shoud not
                            # normally be necessary unless the sequence
                            # comes from a corrupted project.
                            res.selected = True
                seq.deleteSelectedResidues()
                for child in seq.children:
                    child.deleteSelectedResidues()

[docs]    def isColumnEmpty(self, position):
        """
        Checks if a column at given position is empty.
        """
        for seq in self.sequences:
            length = seq.length()
            if seq.isValidProtein() and position < length:
                if not seq.residues[position].is_gap:
                    return False
        return True

[docs]    def minimizeAlignment(self, query_only=False):
        """
        Minimizes the alignment, i.e. removes all gaps from the gap-only
        columns.
        """
        max_length = self.findMaxLength()
        for pos in range(max_length):
            gaps_only = True
            if query_only:
                if self.reference and pos < self.reference.length() and \
                        self.reference.residues[pos].is_gap != True:
                    gaps_only = False
            else:
                for seq in self.sequences:
                    length = seq.length()
                    if seq.isValidProtein() and pos < length:
                        res = seq.residues[pos]
                        if not res.is_gap:
                            gaps_only = False
                            break
            if gaps_only:
                for seq in self.sequences:
                    length = seq.length()
                    if seq.isValidProtein() and pos < length:
                        seq.residues[pos].num = None
                        for child in seq.children:
                            child.residues[pos].num = None
        for seq in self.sequences:
            if seq.isValidProtein():
                seq.residues = [
                    res for res in seq.residues if res.num is not None
                ]
                for child in seq.children:
                    child.residues = [
                        res for res in child.residues if res.num is not None
                    ]

[docs]    def lockGaps(self):
        """
        Locks gaps in visible sequences.
        """
        no_selected = not self.hasSelectedResidues()
        for seq in self.sequences:
            if seq.visible:
                for pos in range(len(seq.residues)):
                    parent_res = seq.residues[pos]
                    if parent_res.is_gap and \
                        (no_selected or parent_res.selected):
                        parent_res.code = constants.LOCKED_GAP_SYMBOL
                    for child in seq.children:
                        if pos not in list(range(len(child.residues))):
                            continue
                        res = child.residues[pos]
                        if res.is_gap and (no_selected or parent_res.selected):
                            res.code = constants.LOCKED_GAP_SYMBOL

[docs]    def unlockGaps(self):
        """
        Unlocks gaps in visible sequences.
        """
        no_selected = not self.hasSelectedResidues()
        for seq in self.sequences:
            if seq.visible:
                for pos in range(len(seq.residues)):
                    parent_res = seq.residues[pos]
                    if parent_res.is_gap and \
                        (no_selected or parent_res.selected):
                        parent_res.code = constants.UNLOCKED_GAP_SYMBOL
                    for child in seq.children:
                        if pos not in list(range(len(child.residues))):
                            continue
                        res = child.residues[pos]
                        if res.is_gap and (no_selected or parent_res.selected):
                            res.code = constants.UNLOCKED_GAP_SYMBOL

[docs]    def selectAlignedBlocks(self, selected_only=False):
        """
        Selects aligned blocks (regions without gaps).
        """
        self.unselectAll()
        any_selected = self.hasSelectedSequences()
        if not any_selected:
            selected_only = False
        max_length = self.findMaxLength()
        for pos in range(max_length):
            n_res = 0
            n_id = 0
            for seq in self.sequences:
                if seq.isValidProtein() and (not selected_only or seq.selected):
                    n_res += 1
                    if pos < seq.length():
                        if not seq.residues[pos].is_gap:
                            n_id += 1
            if n_res == n_id:
                for seq in self.sequences:
                    if seq.isValidProtein() and \
                    (not selected_only or seq.selected):
                        if pos < seq.length() and \
                           not seq.residues[pos].is_gap:
                            seq.residues[pos].selected = True

[docs]    def selectStructureBlocks(self, selected_only=False):
        """
        Selects aligned blocks (regions without gaps).
        """
        self.unselectAll()
        max_length = self.findMaxLength()
        for pos in range(max_length):
            n_id = 0
            for seq in self.sequences:
                if seq.isValidProtein() and (not selected_only or seq.selected):
                    if pos < seq.length():
                        if not seq.residues[pos].structureless:
                            n_id += 1
            if n_id > 0:
                for seq in self.sequences:
                    if seq.isValidProtein() and \
                      (not selected_only or seq.selected):
                        if pos < seq.length() and \
                           not seq.residues[pos].is_gap:
                            seq.residues[pos].selected = True

[docs]    def selectIdentities(self):
        """
        Selects identical residues in the alignment, ignores gaps.
        """
        self.unselectAll()
        max_length = self.findMaxLength()
        for pos in range(max_length):
            code = None
            n_res = 0
            n_id = 0
            for seq in self.sequences:
                if seq.isValidProtein():
                    if pos < seq.length():
                        if not seq.residues[pos].is_gap:
                            n_res = n_res + 1
                            if code is None:
                                code = seq.residues[pos].code
                                n_id += 1
                            elif seq.residues[pos].code == code:
                                n_id += 1
            if n_res > 0 and n_res == n_id:
                for seq in self.sequences:
                    if pos < seq.length() and \
                       not seq.residues[pos].is_gap:
                        seq.residues[pos].selected = True

[docs]    def selectResidues(self, start_row, start_pos, end_row, end_pos, select):
        """
        Selects residues within a specified region.

        :type start_row: (Sequence, int, int, int)
        :param start_row: initial row

        :type start_pos: int
        :param start_pos: initial position

        :type end_row: (Sequence, int, int, int)
        :param end_row: final row

        :type end_pos: int
        :param end_pos: final position
        """
        if start_pos is None or end_pos is None:
            return
        start_seq, start, dummy, height = start_row
        end_seq, dummy, end, height = end_row
        if start_pos > end_pos:
            tmp = end_pos
            end_pos = start_pos
            start_pos = tmp
        if start_seq.parent_sequence:
            start_seq = start_seq.parent_sequence
        if end_seq.parent_sequence:
            end_seq = end_seq.parent_sequence
        # Block selection.
        if (start_seq not in self.sequences) or (end_seq not in self.sequences):
            return
        start_seq_idx = self.sequences.index(start_seq)
        end_seq_idx = self.sequences.index(end_seq)
        if start_seq_idx > end_seq_idx:
            tmp = end_seq_idx
            end_seq_idx = start_seq_idx
            start_seq_idx = tmp
        for seq_idx in range(start_seq_idx, end_seq_idx + 1):
            seq = self.sequences[seq_idx]
            for pos in range(start_pos, end_pos + 1):
                res = seq.getResidue(pos)
                if res:
                    res.selected = select
                for child in seq.children:
                    res = child.getResidue(pos)
                    if res:
                        res.selected = select

[docs]    def grabAndDrag(self,
                    start_row,
                    start_pos,
                    end_row,
                    end_pos,
                    lock_down,
                    gap_insert_mode=False,
                    slide_sequence=False,
                    block_length=0):
        """
        This method performs a grab-and-drag operation on a specified region.

        :type start_row: (Sequence, int, int, int)
        :param start_row: initial row

        :type start_pos: int
        :param start_pos: initial position

        :type end_row: (Sequence, int, int, int)
        :param end_row: final row

        :type end_pos: int
        :param end_pos: final position

        :type lock_down: bool
        :param lock_down: indicates if the sequence downstream should be locked
        """
        if start_row is None or end_row is None:
            return
        start_seq, start, dummy, height = start_row
        end_seq, dummy, end, height = end_row
        if start_seq.parent_sequence:
            start_seq = start_seq.parent_sequence
        if end_seq.parent_sequence:
            end_seq = end_seq.parent_sequence
        if start_seq is not None and start_pos is not None and \
           end_pos is not None and not start_seq.global_sequence:
            parent_sequence = seq = start_seq
            if slide_sequence:
                if end_pos > start_pos:
                    n_inserted = seq.insertGaps(0, end_pos - start_pos)
                    for pos in range(n_inserted):
                        if self.isColumnEmpty(0):
                            for other_seq in self.sequences:
                                other_seq.removeGaps(0, 1)
                elif end_pos < start_pos:
                    diff = start_pos - end_pos
                    n_removed = seq.removeGapsBackwards(0, start_pos - end_pos)
                    if diff > n_removed:
                        for other_seq in self.sequences:
                            if other_seq.isValidProtein() and other_seq != seq:
                                other_seq.insertGaps(0, diff - n_removed)
                return
            if start_pos >= 0 and start_pos < parent_sequence.length() and \
               not parent_sequence.residues[start_pos].active:
                return
            if end_pos > start_pos:
                max_insert = parent_sequence.countActiveGaps(end_pos)
                n_gaps = end_pos - start_pos
                if max_insert >= 0:
                    n_gaps = min(max_insert, n_gaps)
                n_inserted = seq.insertGaps(start_pos, n_gaps)
                if not gap_insert_mode and \
                   (not lock_down or parent_sequence.haveAnchors(end_pos + 1)):
                    seq.removeGaps(end_pos, n_inserted)
            elif end_pos < start_pos:
                if end_pos < 0:
                    end_pos = 0
                n_removed = seq.removeGapsBackwards(end_pos,
                                                    start_pos - end_pos)
                if not lock_down and start_pos < seq.length():
                    seq.insertGaps(start_pos + block_length - n_removed + 1,
                                   n_removed)
                else:
                    inactive_pos = parent_sequence.inactivePosition(start_pos)
                    if inactive_pos >= 0:
                        seq.insertGaps(inactive_pos, n_removed)
            seq.propagateGapsToChildren()

[docs]    def tmpLockGaps(self, row, start_pos, end_pos):
        """
        Temporarily locks gaps in a specified region.

        :type row: (Sequence, int, int, int)
        :param row: sequence viewer row

        :type start_pos: int
        :param start_pos: initial position

        :type end_pos: int
        :param end_pos: final position
        """
        seq, start, end, height = row
        for res in seq.residues:
            res.tmp_code = None
            if res.selected:
                if res.is_gap:
                    res.tmp_code = res.code
                    res.code = constants.LOCKED_GAP_SYMBOL

[docs]    def tmpUnlockGaps(self, row, start_pos, end_pos):
        """
        Unlocks temporarily locked gaps.

        :type row: (Sequence, int, int, int)
        :param row: sequence viewer row

        :type start_pos: int
        :param start_pos: initial position

        :type end_pos: int
        :param end_pos: final position
        """
        seq, start, end, height = row
        for res in seq.residues:
            if res.selected:
                if res.is_gap and res.tmp_code:
                    res.code = res.tmp_code

[docs]    def grabAndDragBlock(self, rows, start_row, start_pos, end_row, end_pos,
                         block_length, lock_down):
        """
        This method performs grab-and-drag operation on selected blocks
        in "Select And Slide" mode.

        :type rows: list of (Sequence, int, int, int)
        :param rows: sequence viewer rows

        :type start_row: (Sequence, int, int, int)
        :param start_row: initial row

        :type start_pos: int
        :param start_pos: initial position

        :type end_row: (Sequence, int, int, int)
        :param end_row: final row

        :type end_pos: int
        :param end_pos: final position

        :type lock_down: bool
        :param lock_down: indicates if the sequence downstream should be locked
        """
        start_seq, start, dummy, height = start_row
        end_seq, dummy, end, height = end_row
        found = False
        for row in rows:
            if row == start_row:
                found = True
            if found:
                seq, dum, dum, dum = row
                if not seq.parent_sequence:
                    self.tmpLockGaps(row, start_pos, end_pos)
                    self.grabAndDrag(row,
                                     start_pos,
                                     row,
                                     end_pos,
                                     lock_down,
                                     block_length=block_length)
                    self.tmpUnlockGaps(row, start_pos, end_pos)
            if row == end_row:
                break

[docs]    def insertGap(self, start_row, start_pos):
        """
        Inserts a single gap at a specified position.

        :type start_row: (Sequence, int, int, int)
        :param start_row: sequence viewer row

        :type start_pos: int
        :param start_pos: position where the gap will be inserted
        """
        start_seq, start, dummy, height = start_row
        if start_seq is not None:
            parent_sequence = seq = start_seq
            n_children = 0
            if seq.parent_sequence:
                parent_sequence = seq = seq.parent_sequence
            n_children = len(parent_sequence.children)
            child_index = 0
            while seq:
                seq.insertGaps(start_pos, 1)
                # Iterate over children.
                if child_index < n_children:
                    seq = parent_sequence.children[child_index]
                    child_index += 1
                else:
                    seq = None

[docs]    def removeGap(self, start_row, start_pos):
        """
        Removes a single gap at a specified position.

        :type start_row: (Sequence, int, int, int)
        :param start_row: sequence viewer row

        :type start_pos: int
        :param start_pos: position where the gap will be removed
        """
        start_seq, start, dummy, height = start_row
        if start_seq is not None:
            parent_sequence = seq = start_seq
            n_children = 0
            if seq.parent_sequence:
                parent_sequence = seq = seq.parent_sequence
            n_children = len(parent_sequence.children)
            child_index = 0
            while seq:
                seq.removeGaps(start_pos, 1)
                # Iterate over children.
                if child_index < n_children:
                    seq = parent_sequence.children[child_index]
                    child_index += 1
                else:
                    seq = None

[docs]    def colorBySequenceSimilarity(self):
        """
        This method colors the sequences by sequence similarity.
        """
        self.colorByResidueType()
        for seq in self.sequences:
            if seq.isValidProtein(global_annotation=True):
                for pos in range(seq.length()):
                    res = seq.residues[pos]
                    color = (0, 0, 0)
                    if res.is_gap:
                        res.inverted = False
                    else:
                        res.inverted = True
                        color = self.background_color
                        if self.reference and pos < self.reference.length():
                            profile_res = self.reference.residues[pos]
                            if res.code == profile_res.code:
                                color = (255, 64, 64)
                            else:
                                score = utils.matrixValue(
                                    constants.SIMILARITY_MATRIX, res.code,
                                    profile_res.code)
                                if score > 0.0:
                                    color = (255, 128, 0)
                    res.color = color

[docs]    def colorByIdentity(self):
        """
        This method colors the sequences by identity.
        """
        if not self.reference:
            return
        for pos in range(self.reference.length()):
            ref_code = self.reference.residues[pos].code
            if ref_code != '.':
                for seq in self.sequences:
                    if seq.isValidProtein():
                        if pos < seq.length():
                            res = seq.residues[pos]
                            if not res.is_gap:
                                if res.code != ref_code:
                                    res.color = self.background_color

[docs]    def colorByDifference(self):
        """
        This method colors the sequences by difference.
        """
        if not self.reference:
            return
        for pos in range(self.reference.length()):
            ref_code = self.reference.residues[pos].code
            if ref_code != '.':
                for seq in self.sequences:
                    if seq.isValidProtein():
                        if pos < seq.length():
                            res = seq.residues[pos]
                            if not res.is_gap:
                                if res.code == ref_code:
                                    res.color = self.background_color

[docs]    def calculateProfile(self, ignore_query=False):
        """
        Calculates global sequence profile and associated information.

        :type ignore_query: bool
        :param ignore_query: Tells if query sequence should be included in
            the calculation or it should be ignored.
        """
        max_length = self.findMaxLength(exclude_consensus=True)
        if not self.profile:
            self.profile = Sequence()
            self.profile.short_name = "Sequence Profile"
            for pos in range(max_length):
                res = Residue()
                res.sequence = self.profile
                self.profile.residues.append(res)
        self.updateReference()
        if self.profile.length() < max_length:
            self.profile.residues = []
            d_residues = max_length - self.profile.length()
            for pos in range(d_residues):
                res = Residue()
                res.sequence = self.profile
                self.profile.residues.append(res)
        else:
            self.profile.residues = self.profile.residues[:max_length]
        amino_acid_symbols = list(constants.AMINO_ACIDS)
        n_seq = 0
        for seq in self.sequences:
            if seq == self.reference and ignore_query:
                continue
            if seq.isValidProtein():
                n_seq += 1
        if n_seq == 0:
            return
        i_n_seq = old_div(1.0, n_seq)
        if ignore_query or n_seq == 1:
            i_n_seq_1 = i_n_seq
        else:
            i_n_seq_1 = old_div(1.0, (n_seq - 1))
        for pos in range(max_length):
            frequency = {}
            for aa in string.ascii_uppercase:
                frequency[aa] = 0.0
            code = None
            n_res = 0
            n_id = 0
            n_ungapped = 0
            column = ""
            for seq in self.sequences:
                if seq == self.reference and ignore_query:
                    continue
                if seq.isValidProtein():
                    if pos < seq.length():
                        res = seq.residues[pos]
                        if not res.is_gap:
                            n_ungapped += 1
                        if not res.is_gap or self.consider_gaps:
                            if res.code in amino_acid_symbols:
                                frequency[res.code] += 1
                                column += res.code
                            if code is None:
                                code = res.code
                                n_id += 1
                            elif res.code == code:
                                n_id += 1
                            n_res = n_res + 1
            if not self.consider_gaps and n_res > 0:
                factor = old_div(1.0, n_res)
            else:
                factor = i_n_seq
            res = self.profile.residues[pos]
            res.composition = set(column)
            res.factor = n_ungapped * factor
            max = 0.0
            max_code = aa
            for aa in amino_acid_symbols:
                if aa == 'X':
                    break
                f = float(frequency[aa])
                if f > max:
                    max = f
                    max_code = aa

            # Shannon entropy
            entropy = 0.0
            for aa in amino_acid_symbols:
                fract = float(frequency[aa]) * i_n_seq
                if fract > 0.0:
                    entropy += -fract * math.log(fract, 2.0)
            res.bits = math.log(20.0, 2.0) - entropy
            res.entropy = old_div(entropy, -math.log(old_div(1.0, 20.0), 2.0))
            n_max = 0
            profile_codes = []
            if max > 0.0:
                for aa in amino_acid_symbols:
                    if max == float(frequency[aa]):
                        profile_codes.append(aa)
                        n_max += 1

            # Query residue fraction
            res.query_fraction = 0.0
            if pos < len(self.reference.residues) and \
                    not self.reference.residues[pos].is_gap:
                query_aa = self.reference.residues[pos].code
                if query_aa == '+':
                    query_aa = self.reference.residues[pos].profile_codes[pos]
                if not ignore_query:
                    res.query_fraction = (frequency[query_aa] - 1) * i_n_seq_1
                else:
                    res.query_fraction = frequency[query_aa] * i_n_seq_1
            if n_res == n_id:
                res.id = True
            else:
                res.id = False
            max *= factor
            res.value = max
            res.profile_codes = profile_codes
            if code:
                res.is_gap = False
                if n_max == 1:
                    res.code = max_code
                else:
                    res.code = '+'
            else:
                res.code = '.'
            for aa in amino_acid_symbols:
                frequency[aa] *= i_n_seq
            res.frequencies = sorted(list(frequency.items()),
                                     key=lambda x: x[1],
                                     reverse=True)

        # Now calculate sequence identity for every sequence in the group.
        for seq in self.sequences:
            if seq.isValidProtein():
                seq.identity = seq.calcIdentity(self.reference,
                                                self.consider_gaps,
                                                self.identity_in_columns)
                seq.similarity = seq.calcSimilarity(self.reference,
                                                    self.consider_gaps,
                                                    self.identity_in_columns)
                seq.score = seq.calcScore(self.reference, self.consider_gaps,
                                          self.identity_in_columns)
                seq.homology = seq.calcHomology(self.reference,
                                                self.consider_gaps,
                                                self.identity_in_columns)

[docs]    def getConsensus(self):
        """
        Returns a consensus sequence.
        """
        for seq in self.sequences:
            if seq.type == constants.SEQ_CONSENSUS:
                return seq
        return None

[docs]    def updateConsensus(self, consensus):
        """
        Updates the consensus sequence using a pre-calculated profile.

        :type consensus: `Sequence`
        :param consensus: consensus sequence to be updated
        """
        consensus.residues = []
        consensus.global_sequence = True
        aa_codes = list(constants.AMINO_ACIDS)
        for pos in range(self.profile.length()):
            res = Residue()
            res.code = self.profile.residues[pos].code
            if res.code not in aa_codes:
                res.color = self.inv_background_color
                res.inverted = True
            res.value = self.profile.residues[pos].value
            res.profile_codes = self.profile.residues[pos].profile_codes
            res.sequence = consensus
            consensus.residues.append(res)
        if len(consensus.children) > 0:
            # Update the plot only if there is one.
            plot = consensus.children[0]
            plot.residues = []
            plot.global_sequence = True
            for pos in range(self.profile.length()):
                res = Residue()
                res.code = self.profile.residues[pos].code
                res.value = self.profile.residues[pos].value
                res.sequence = plot
                res.color = (32, 32, 32)
                plot.residues.append(res)
            plot.calculatePlotValues(0, min_value=0.0, max_value=1.0)
        consensus.identity = consensus.calcIdentity(self.reference,
                                                    self.consider_gaps, False)
        consensus.similarity = consensus.calcSimilarity(self.reference,
                                                        self.consider_gaps,
                                                        False)
        consensus.homology = consensus.calcHomology(self.reference,
                                                    self.consider_gaps, False)
        consensus.score = consensus.calcScore(self.reference,
                                              self.consider_gaps, False)

[docs]    def updateSymbols(self, symbols):
        """
        Updates the consensus symbols string using a pre-calculated profile.

        :type symbols: `Sequence`
        :param symbols: consensus symbols string to be updated
        """
        strong_sets = [
            set("STA"),
            set("NEQK"),
            set("NHQK"),
            set("NDEQ"),
            set("QHRK"),
            set("MILV"),
            set("MILF"),
            set("HY"),
            set("FYW")
        ]

        weak_sets = [
            set("CSA"),
            set("ATV"),
            set("SAG"),
            set("STNK"),
            set("STPA"),
            set("SGND"),
            set("SNDEQK"),
            set("NDEQHK"),
            set("NEQHRK"),
            set("FVLIM"),
            set("HFY")
        ]

        symbols.residues = []
        symbols.global_sequence = True
        for pos in range(self.profile.length()):
            res = Residue()
            profile_res = self.profile.residues[pos]
            res.value = profile_res.value
            res.profile_codes = profile_res.profile_codes
            res.sequence = symbols
            res.code = ' '
            res.color = self.background_color
            if profile_res.factor == 1.0 and profile_res.composition:
                if profile_res.id and len(profile_res.composition) == 1:
                    res.code = '*'
                else:
                    for strong in strong_sets:
                        if profile_res.composition.issubset(strong):
                            res.code = ':'
                            break
                    if res.code == ' ':
                        for weak in weak_sets:
                            if profile_res.composition.issubset(weak):
                                res.code = '.'
                                break
            symbols.residues.append(res)

[docs]    def updateMeanHydrophobicity(self, seq):
        """
        Updates mean hydrophobicity global annotation sequence.

        :type seq: `Sequence`
        :param seq: mean hydrophobicity sequence
        """
        seq.residues = []
        seq.plot_style = constants.PLOT_HISTOGRAM
        seq.plot_color = (192, 128, 160)
        seq.global_sequence = True
        for pos in range(self.profile.length()):
            res = Residue()
            res.code = self.profile.residues[pos].code
            n_aa = 0
            total = 0.0
            res.profile_codes = self.profile.residues[pos].profile_codes
            for aa in res.profile_codes:
                if aa != 'X':
                    total += constants.HYDROPHOBICITY_SCALES[aa][0]
                n_aa += 1
            if n_aa > 0:
                total /= n_aa
            res.value = total
            res.sequence = seq
            seq.residues.append(res)
        seq.calculatePlotValues(2)

[docs]    def updateMeanPI(self, seq):
        """
        Updates mean isoelectric point global annotation sequence.

        :type seq: `Sequence`
        :param seq: mean hydrophobicity sequence
        """
        seq.residues = []
        seq.plot_style = constants.PLOT_HISTOGRAM
        seq.plot_color = (128, 192, 160)
        seq.global_sequence = True
        for pos in range(self.profile.length()):
            res = Residue()
            res.code = self.profile.residues[pos].code
            n_aa = 0
            total = 0.0
            res.profile_codes = self.profile.residues[pos].profile_codes
            for aa in res.profile_codes:
                if aa != 'X':
                    total += constants.PI_SCALE[aa]
                n_aa += 1
            if n_aa > 0:
                total /= n_aa
            res.value = total
            res.sequence = seq
            seq.residues.append(res)
        seq.calculatePlotValues(2)

[docs]    def colorByTaylorScheme(self):
        """
        Colors all amino acid sequences using Taylor scheme.
        """
        for seq in self.sequences:
            if seq.isValidProtein(global_annotation=True):
                for res in seq.residues:
                    res.color = res.colorTaylor()
                    if not res.is_gap:
                        res.inverted = True
                    else:
                        res.inverted = False

[docs]    def colorAverageColumnColors(self):
        """
        Averages colors in columns.

        :note: This feature works particularly well with the Taylor scheme.
        """
        max_length = self.findMaxLength()

        for pos in range(max_length):
            r = g = b = 0
            n_res = 0
            for seq in self.sequences:
                if seq.isValidProtein():
                    if pos < seq.length():
                        res = seq.residues[pos]
                        if not res.is_gap:
                            res_r, res_g, res_b = res.color
                            r += res_r
                            g += res_g
                            b += res_b
                            n_res += 1
            if n_res:
                i_n_res = old_div(1.0, n_res)
                r *= i_n_res
                g *= i_n_res
                b *= i_n_res
                for seq in self.sequences:
                    if seq.isValidProtein():
                        if pos < seq.length():
                            res = seq.residues[pos]
                            if not res.is_gap:
                                seq.residues[pos].color = \
                                    (int(r), int(g), int(b))

[docs]    def colorWeightByAlignmentStrength(self, min_weight_identity,
                                       max_weight_identity):
        """
        Weights the sequences by alignment stregth.
        """
        inv_range = old_div(1.0, (max_weight_identity - min_weight_identity))
        profile_length = 0
        if self.profile:
            profile_length = self.profile.length()
        br, bg, bb = self.background_color
        for seq in self.sequences:
            if seq.isValidProtein():
                for pos in range(seq.length()):
                    res = seq.residues[pos]
                    if not res.is_gap and pos < profile_length:
                        res_r, res_g, res_b = res.color
                        value = 100.0 * self.profile.residues[pos].value
                        value = value - min_weight_identity
                        if value < 0:
                            value = 0
                        value *= inv_range
                        if value > 1.0:
                            value = 1.0
                        res_r = br * (1.0 - value) + res_r * value
                        res_g = bg * (1.0 - value) + res_g * value
                        res_b = bb * (1.0 - value) + res_b * value
                        res.color = (res_r, res_g, res_b)

[docs]    def findMaxLength(self, exclude_consensus=False):
        """
        Finds a length of the longest sequence.

        :type exclude_consensus: bool
        :param exclude_consensus: if True, the consensus sequence will not count

        :rtype: int
        :return: maximum sequence length in the entire group
        """
        max_length = 0
        for seq in self.sequences:
            if seq and ((not exclude_consensus) or seq.type != \
                        constants.SEQ_CONSENSUS) and not seq.global_sequence:
                if seq.length() > max_length:
                    max_length = seq.length()
        return max_length

[docs]    def unselectAllSequences(self):
        """
        Unselects all sequences in the group.
        """
        for seq in self.sequences:
            seq.selected = False
            for child in seq.children:
                child.selected = False

[docs]    def selectAllSequences(self):
        """
        Selects all sequences in the group. Child selection remains unchanged.
        """
        for seq in self.sequences:
            if seq.type != constants.SEQ_SEPARATOR and \
               seq.type != constants.SEQ_RULER:
                seq.selected = True
            # for child in seq.children:
            #    child.selected = False

            #residues should be in name!
[docs]    def selectAll(self):
        """
        Selects all residues.
        """
        for seq in self.sequences:
            seq.selectAllResidues()

[docs]    def deselectAll(self):
        """
        Deselects all residues.
        """
        for seq in self.sequences:
            seq.unselectResidues()

[docs]    def invertSequenceSelection(self):
        """
        Inverts sequence selection range.
        """
        for seq in self.sequences:
            if len(seq.residues) > 0:
                if seq.selected:
                    seq.selected = False
                    for child in seq.children:
                        child.selected = False
                else:
                    seq.selected = True
                    for child in seq.children:
                        child.selected = False

[docs]    def invertSelection(self):
        """
        Inverts residue selection range.
        """
        for seq in self.sequences:
            seq.invertSelection()


#ANNOTATIONS WILL NOT BE PART OF SEQUENCE:

[docs]    def deleteAnnotations(self):
        """
        Removes all annotations.
        """
        self.sequences = [
            seq for seq in self.sequences
            if seq.type != constants.SEQ_ANNOTATION and
            seq.type != constants.SEQ_CONSENSUS and
            seq.type != constants.SEQ_SYMBOLS and seq.type != constants.SEQ_LOGO
            and seq.type != constants.SEQ_SECONDARY
        ]
        not_selected = True
        if self.hasSelectedSequences():
            not_selected = False
        for seq in self.sequences:
            if not_selected or seq.selected:
                seq.children = [
                    child for child in seq.children
                    if child.annotation_type == constants.ANNOTATION_SSP or
                    child.annotation_type == constants.ANNOTATION_ACC or
                    child.annotation_type == constants.ANNOTATION_DIS or
                    child.annotation_type == constants.ANNOTATION_DOM or
                    child.annotation_type == constants.ANNOTATION_CCB or
                    child.annotation_type == constants.ANNOTATION_PFAM
                ]
        self.updateReference()

[docs]    def deleteGlobalAnnotations(self):
        """
        Removes all global annotations.
        """
        self.sequences = [
            seq for seq in self.sequences
            if seq.type != constants.SEQ_CONSENSUS and
            seq.type != constants.SEQ_SYMBOLS and seq.type != constants.SEQ_LOGO
            and not (seq.type == constants.SEQ_ANNOTATION and
                     (seq.annotation_type == constants.ANNOTATION_MEAN_HYDRO or
                      seq.annotation_type == constants.ANNOTATION_MEAN_PI))
        ]
        self.updateReference()

[docs]    def deletePredictions(self):
        """
        Removes all predictions.
        """
        not_selected = True
        if self.hasSelectedSequences():
            not_selected = False
        for seq in self.sequences:
            if not_selected or seq.selected:
                seq.children = [
                    child for child in seq.children
                    if child.annotation_type != constants.ANNOTATION_SSP and
                    child.annotation_type != constants.ANNOTATION_ACC and
                    child.annotation_type != constants.ANNOTATION_DIS and
                    child.annotation_type != constants.ANNOTATION_DOM and
                    child.annotation_type != constants.ANNOTATION_CCB and
                    child.annotation_type != constants.ANNOTATION_PFAM
                ]
        self.updateReference()

[docs]    def addSeparator(self):
        """
        Adds a separator sequence.
        """
        seq = Sequence()
        seq.type = constants.SEQ_SEPARATOR
        self.sequences.append(seq)

[docs]    def addRuler(self, update_only=False):
        """
        Adds a ruler sequence.

        :rtype: `Sequence`
        :return: A ruler sequence.
        """
        if len(self.sequences) == 0:
            return None
        for seq in self.sequences:
            if seq.type == constants.SEQ_RULER:
                self.sequences.remove(seq)
                self.sequences.insert(0, seq)
                seq.visible = True
                return seq
        if update_only:
            return
        seq = Sequence()
        seq.type = constants.SEQ_RULER
        self.sequences.insert(0, seq)
        return seq

[docs]    def removeRuler(self):
        """
        Removes a ruler sequence.
        """
        for seq in self.sequences:
            if seq.type == constants.SEQ_RULER:
                self.sequences.remove(seq)

[docs]    def addConsensusSequence(self, toggle=False):
        """
        Creates a consensus sequence and adds it to the group.
        If the annotation already exists, just expands the sequence and
        makes it visible.

        :rtype: `Sequence`
        :return: consensus sequence
        """
        seq = None
        self.calculateProfile()
        self.updateVariableSequences()
        # Check if we don't have a consensus sequence already.
        for seq in self.sequences:
            if seq.isValidProtein():
                break
            if seq.type == constants.SEQ_CONSENSUS:
                if toggle:
                    self.sequences.remove(seq)
                    return
                seq.visible = True
                seq.showChildren()
                return seq
        if not seq:
            return None
        if not seq.isValidProtein():
            return None

        # Create a consensus sequence
        consensus = Sequence()
        consensus.type = constants.SEQ_CONSENSUS
        consensus.name = "Consensus Sequence"
        consensus.short_name = "Consensus Sequence"
        consensus.global_sequence = True
        index = self.sequences.index(seq)
        self.sequences.insert(index, consensus)
        seq = Sequence()
        seq.type = constants.SEQ_ANNOTATION
        seq.parent_sequence = consensus
        seq.height = 3
        seq.name = "Consensus Plot"
        seq.short_name = "Consensus Plot"
        seq.plot_color = (96, 128, 96)
        consensus.children.append(seq)
        self.updateReference()
        return consensus

[docs]    def addConsensusSymbols(self, toggle=False):
        """
        Creates a consensus symbols string and adds it to the group.
        If the annotation already exists, just expands the sequence and
        makes it visible.

        :rtype: `Sequence`
        :return: consensus sequence
        """
        self.calculateProfile()
        self.updateVariableSequences()
        # Check if we don't have a consensus sequence already.
        seq = None
        for seq in self.sequences:
            if seq.isValidProtein():
                break
            if seq.type == constants.SEQ_SYMBOLS:
                if toggle:
                    self.sequences.remove(seq)
                    return
                seq.visible = True
                seq.showChildren()
                return seq
        if not seq or not seq.isValidProtein():
            return None
        # Create a consensus sequence
        symbols = Sequence()
        symbols.type = constants.SEQ_SYMBOLS
        symbols.name = "Consensus Symbols"
        symbols.short_name = "Consensus Symbols"
        symbols.global_sequence = True
        index = self.sequences.index(seq)
        self.sequences.insert(index, symbols)
        return symbols

[docs]    def addMeanHydrophobicity(self, toggle=False):
        """
        Creates a mean hydrophobicity annotation and adds it to the group.
        If the annotation already exists, just expands the sequence and
        makes it visible.

        :rtype: `Sequence`
        :return: mean hydrophbicity annotation
        """
        self.calculateProfile()
        self.updateVariableSequences()
        if len(self.sequences) == 0:
            return None
        # Find a first amino acid sequence.
        for seq in self.sequences:
            if seq.isValidProtein():
                break
            if seq.type == constants.SEQ_ANNOTATION and \
               seq.annotation_type == constants.ANNOTATION_MEAN_HYDRO:
                if toggle:
                    self.sequences.remove(seq)
                    return
                seq.visible = True
                seq.showChildren()
                return seq
        if not seq.isValidProtein():
            return None
        index = self.sequences.index(seq)

        # Create a plot sequence.
        plot = Sequence()
        plot.type = constants.SEQ_ANNOTATION
        plot.annotation_type = constants.ANNOTATION_MEAN_HYDRO
        plot.name = "Mean Hydrophobicity"
        plot.short_name = "Mean Hydrophobicity"
        plot.height = 3
        plot.global_sequence = True
        self.sequences.insert(index, plot)
        return plot

[docs]    def addMeanPI(self, toggle=False):
        """
        Creates a mean isoelectric point annotation and adds it to the group.
        If the annotation already exists, just expands the sequence and
        makes it visible.

        :rtype: `Sequence`
        :return: mean hydrophbicity annotation
        """
        self.calculateProfile()
        self.updateVariableSequences()
        if len(self.sequences) == 0:
            return None
        # Find a first amino acid sequence.
        for seq in self.sequences:
            if seq.isValidProtein():
                break
            if seq.type == constants.SEQ_ANNOTATION and \
               seq.annotation_type == constants.ANNOTATION_MEAN_PI:
                if toggle:
                    self.sequences.remove(seq)
                    return
                seq.visible = True
                seq.showChildren()
                return seq
        if not seq.isValidProtein():
            return None
        index = self.sequences.index(seq)
        # Create a plot sequence.
        plot = Sequence()
        plot.type = constants.SEQ_ANNOTATION
        plot.annotation_type = constants.ANNOTATION_MEAN_PI
        plot.name = "Mean Isoelectric Point"
        plot.short_name = "Mean Isoelectric Point"
        plot.height = 3
        plot.global_sequence = True
        self.sequences.insert(index, plot)
        return plot

[docs]    def addSequenceLogo(self, toggle=False):
        """
        Creates a sequence logo annotation and adds it to the group.
        If the annotation already exists, just expands the sequence and
        makes it visible.

        :rtype: `Sequence`
        :return: sequence logo annotation object
        """
        if len(self.sequences) == 0:
            return None
        # Find a first amino acid sequence.
        for seq in self.sequences:
            if seq.isValidProtein():
                break
            if seq.type == constants.SEQ_LOGO:
                if toggle:
                    self.sequences.remove(seq)
                    return
                seq.visible = True
                return seq
        if not seq.isValidProtein():
            return None
        index = self.sequences.index(seq)

        # Create a plot sequence.
        logo = Sequence()
        logo.type = constants.SEQ_LOGO
        logo.name = "Sequence Logo"
        logo.short_name = "Sequence Logo"
        logo.height = 3
        logo.global_sequence = True
        self.sequences.insert(index, logo)
        return logo

[docs]    def colorByResidueType(self):
        for seq in self.sequences:
            if seq.visible and \
                    seq.isValidProtein(global_annotation=True):
                for res in seq.residues:
                    if not res.is_gap:
                        res.inverted = True
                    else:
                        res.inverted = False
                    res.color = res.colorType()

[docs]    def colorByMaestro(self):
        """
        Colors the sequences by Maestro colors.
        """
        for seq in self.sequences:
            if seq.visible and \
               seq.from_maestro:
                seq.color_mode = constants.COLOR_MAESTRO
                for res in seq.residues:
                    if res.is_gap:
                        res.color = (0, 0, 0)
                    else:
                        res.color = res.maestro_color
                    if not res.is_gap:
                        res.inverted = True
                    else:
                        res.inverted = False

[docs]    def colorByKyteDoolittle(self):
        """
        Colors the sequences by Kyte-Doolittle hydrophobicity scale.
        """
        for seq in self.sequences:
            if seq.visible and seq.isValidProtein(global_annotation=True):
                for res in seq.residues:
                    res.color = res.colorKD()
                    if not res.is_gap:
                        res.inverted = True
                    else:
                        res.inverted = False

[docs]    def colorByHoppWoods(self):
        """
        Colors the sequences by Hopp-Woods hydrophilicity scale.
        """
        for seq in self.sequences:
            if seq.visible and seq.isValidProtein(global_annotation=True):
                for res in seq.residues:
                    res.color = res.colorHW()
                    if not res.is_gap:
                        res.inverted = True
                    else:
                        res.inverted = False

[docs]    def colorByGray(self):
        """
        Colors the sequences using plain gray color. This scheme is useful
        in combination with "color by alignment strength."
        """
        for seq in self.sequences:
            if seq.visible and seq.isValidProtein(global_annotation=True):
                seq.color_scheme = constants.COLOR_BLACK
                for res in seq.residues:
                    if not res.is_gap:
                        res.inverted = True
                        res.color = (63, 63, 63)
                    else:
                        res.inverted = False
                        res.color = (0, 0, 0)

[docs]    def colorByWhite(self):
        """
        Colors the sequences using white color.
        """
        for seq in self.sequences:
            if seq.visible and seq.isValidProtein(global_annotation=True):
                seq.color_scheme = constants.COLOR_WHITE
                for res in seq.residues:
                    if not res.is_gap:
                        res.inverted = True
                        res.color = (255, 255, 255)
                    else:
                        res.inverted = False
                        res.color = (0, 0, 0)

[docs]    def colorByCustomColor(self, color):
        """
        Colors the sequences using custom color.
        """
        for seq in self.sequences:
            if seq.visible and seq.isValidProtein(global_annotation=True):
                seq.color_scheme = constants.COLOR_CUSTOM
                seq.custom_color = color
                for res in seq.residues:
                    if not res.is_gap:
                        res.inverted = True
                        res.color = color
                    else:
                        res.inverted = False
                        res.color = (0, 0, 0)

[docs]    def colorByCustomAnnotations(self):
        """
        Colors the sequences using custom annotation color.
        """
        for seq in self.sequences:
            if seq.visible and seq.isValidProtein(global_annotation=True):
                for child in seq.children:
                    if child.annotation_type == constants.ANNOTATION_CUSTOM:
                        for index, res in enumerate(seq.residues):
                            if child.residues[index].color != (255, 255, 255):
                                res.color = child.residues[index].color

[docs]    def colorByBfactor(self):
        """
        Colors the sequences using Bfactor values.
        """
        for seq in self.sequences:
            if seq.visible and seq.isValidProtein():
                seq.color_scheme = constants.COLOR_BFACTOR
                minbf = seq.residues[0].bfactor
                maxbf = seq.residues[-1].bfactor
                for res in seq.residues:
                    if res.bfactor < minbf:
                        minbf = res.bfactor
                    if res.bfactor > maxbf:
                        maxbf = res.bfactor
                dbf = maxbf - minbf
                if dbf > 0.0:
                    dbf = old_div(1.0, dbf)
                else:
                    dbf = 1.0
                for res in seq.residues:
                    if not res.is_gap:
                        res.color = utils.colorGradientGreenRed(
                            1.5 * dbf * (res.bfactor - minbf) - 0.75)
                    else:
                        res.inverted = False
                        res.color = self.inv_background_color

[docs]    def colorByPosition(self):
        """
        Colors the sequences using residue position.
        """
        for seq in self.sequences:
            if seq.visible and seq.isValidProtein():
                seq.color_scheme = constants.COLOR_POSITION
                residues = seq.gaplessResidues()
                maxpos = float(len(residues))
                for index, res in enumerate(residues):
                    res.color = utils.colorGradientGreenRed(
                        1.5 * (old_div(index, maxpos)) - 0.75)

[docs]    def colorBySecondary(self):
        """
        Colors the sequences using secondary structure annotation. If SSA
        is available, if will be used for coloring, otherwise a consensus
        of available SSP annotations will be used here.
        """
        for seq in self.sequences:
            if seq.visible and seq.isValidProtein(global_annotation=True):
                seq.color_scheme = constants.COLOR_SECONDARY
                ssa = None
                for child in seq.children:
                    if child.type == constants.SEQ_SECONDARY:
                        ssa = child
                        break
                ssp_list = []
                if ssa is None:
                    for child in seq.children:
                        if child.annotation_type == constants.ANNOTATION_SSP:
                            ssp_list.append(child)
                if ssa is None and len(ssp_list) == 0:
                    continue
                for index, res in enumerate(seq.residues):
                    if not res.is_gap:
                        res.inverted = True
                        if ssa:
                            if ssa.residues[index].code == 'H':
                                res.color = constants.SS_COLOR_HELIX
                            elif ssa.residues[index].code == 'E':
                                res.color = constants.SS_COLOR_EXTENDED
                            else:
                                res.color = (192, 192, 192)
                        else:
                            avg_color = [0, 0, 0]
                            i_n_ssp = old_div(1.0, float(len(ssp_list)))
                            for ssp in ssp_list:
                                if ssp.residues[index].code == 'H':
                                    avg_color[0] += constants.SS_COLOR_HELIX[0]
                                    avg_color[1] += constants.SS_COLOR_HELIX[1]
                                    avg_color[2] += constants.SS_COLOR_HELIX[2]
                                elif ssp.residues[index].code == 'E':
                                    avg_color[0] += constants.SS_COLOR_EXTENDED[
                                        0]
                                    avg_color[1] += constants.SS_COLOR_EXTENDED[
                                        1]
                                    avg_color[2] += constants.SS_COLOR_EXTENDED[
                                        2]
                                else:
                                    avg_color[0] += 192
                                    avg_color[1] += 192
                                    avg_color[2] += 192
                            avg_color[0] *= i_n_ssp
                            avg_color[1] *= i_n_ssp
                            avg_color[2] *= i_n_ssp
                            res.color = (int(avg_color[0]), int(avg_color[1]),
                                         int(avg_color[2]))
                    else:
                        res.inverted = False
                        res.color = (0, 0, 0)

[docs]    def colorByColorBlocks(self, scheme):
        """
        Colors the sequences using plain gray color. This scheme is useful
        in combination with "color by alignment strength."
        """
        for seq in self.sequences:
            if seq.visible and seq.isValidProtein(global_annotation=True):
                seq.color_scheme = scheme
                for res in seq.residues:
                    # if not res.is_gap and not res.code in ['.', '+']:
                    res.inverted = True
                    # else:
                    #    res.inverted = False
                    if scheme == constants.COLOR_HELIX_PROPENSITY:
                        res.color = res.colorHelixPropensity()
                    elif scheme == constants.COLOR_STRAND_PROPENSITY:
                        res.color = res.colorStrandPropensity()
                    elif scheme == constants.COLOR_TURN_PROPENSITY:
                        res.color = res.colorTurnPropensity()
                    elif scheme == constants.COLOR_SIDECHAIN_CHEMISTRY:
                        res.color = res.colorSidechainChemistry()
                    elif scheme == constants.COLOR_STERIC_GROUP:
                        res.color = res.colorStericGroup()
                    elif scheme == constants.COLOR_EXPOSURE_TENDENCY:
                        res.color = res.colorExposureTendency()

[docs]    def colorSequences(self, mode=None, color=None):
        """
        Colors the sequences using a specified mode.

        :type mode: int
        :param mode: coloring mode
        """
        if mode is None:
            mode = self.color_mode
        if mode == constants.COLOR_BLACK:
            self.colorByGray()
        elif mode == constants.COLOR_WHITE:
            self.colorByWhite()
        elif mode == constants.COLOR_RESIDUE_TYPE:
            self.colorByResidueType()
        elif mode == constants.COLOR_SIMILARITY:
            self.colorBySequenceSimilarity()
        elif mode == constants.COLOR_KYTE_DOOLITTLE:
            self.colorByKyteDoolittle()
        elif mode == constants.COLOR_HOPP_WOODS:
            self.colorByHoppWoods()
        elif mode == constants.COLOR_TAYLOR:
            self.colorByTaylorScheme()
        elif mode == constants.COLOR_MAESTRO:
            self.colorByMaestro()
        elif mode == constants.COLOR_SECONDARY:
            self.colorBySecondary()
        elif mode == constants.COLOR_BFACTOR:
            self.colorByBfactor()
        elif mode == constants.COLOR_POSITION:
            self.colorByPosition()
        elif mode == constants.COLOR_HELIX_PROPENSITY:
            self.colorByColorBlocks(constants.COLOR_HELIX_PROPENSITY)
        elif mode == constants.COLOR_STRAND_PROPENSITY:
            self.colorByColorBlocks(constants.COLOR_STRAND_PROPENSITY)
        elif mode == constants.COLOR_TURN_PROPENSITY:
            self.colorByColorBlocks(constants.COLOR_TURN_PROPENSITY)
        elif mode == constants.COLOR_SIDECHAIN_CHEMISTRY:
            self.colorByColorBlocks(constants.COLOR_SIDECHAIN_CHEMISTRY)
        elif mode == constants.COLOR_EXPOSURE_TENDENCY:
            self.colorByColorBlocks(constants.COLOR_EXPOSURE_TENDENCY)
        elif mode == constants.COLOR_STERIC_GROUP:
            self.colorByColorBlocks(constants.COLOR_STERIC_GROUP)
        elif mode == constants.COLOR_CUSTOM:
            if color is not None:
                self.custom_color = color
            self.colorByCustomColor(self.custom_color)
        self.colorByCustomAnnotations()

[docs]    def hideAllChildren(self):
        """
        Hides all children, effectively collapsing the sequences.
        """
        for seq in self.sequences:
            seq.hideChildren()

[docs]    def showAllChildren(self):
        """
        Shows all children, effectively expanding the sequences.
        """
        for seq in self.sequences:
            seq.showChildren()

[docs]    def updateVariableSequences(self):
        """
        Updates global variable sequences, i.e. consensus plot and "mean"
        annotations.
        """
        self.updateReference()
        for seq in self.sequences:
            if seq.type == constants.SEQ_CONSENSUS:
                self.updateConsensus(seq)
            elif seq.type == constants.SEQ_SYMBOLS:
                self.updateSymbols(seq)
            elif seq.type == constants.SEQ_ANNOTATION:
                if seq.annotation_type == constants.ANNOTATION_MEAN_HYDRO:
                    self.updateMeanHydrophobicity(seq)
                elif seq.annotation_type == constants.ANNOTATION_MEAN_PI:
                    self.updateMeanPI(seq)

[docs]    def padAlignment(self):
        """
        Pads the alignment with additional gaps, so all sequences have
        identical length equal to the length of the longest sequence.
        """
        max_length = self.findMaxLength(exclude_consensus=True)
        for seq in self.sequences:
            if seq.isValidProtein():
                if seq.length() and seq.length() < max_length:
                    active = seq.residues[seq.length() - 1].active
                    seq.insertGaps(seq.length(),
                                   max_length - seq.length(),
                                   active=active)
                    for child in seq.children:
                        if child.length() < max_length:
                            child.insertGaps(child.length(),
                                             max_length - child.length())

[docs]    def removeTerminalGaps(self):
        """
        Removes terminal gaps from the alignment.
        """
        for seq in self.sequences:
            if seq.isValidProtein():
                while len(seq.residues) and seq.residues[-1].is_gap:
                    seq.residues.pop()
                if not len(seq.residues):
                    self.sequences.remove(seq)
                else:
                    for child in seq.children:
                        while len(child.residues) and child.residues[-1].is_gap:
                            child.residues.pop()

[docs]    def encode(self, seq):
        """
        Encodes the sequence to include annotation data required by
        pattern search. Return an ungapped version of the pattern.
        """
        ssa = None
        for child in seq.children:
            if child.type == constants.SEQ_SECONDARY:
                ssa = child
                break
        acc = None
        for child in seq.children:
            if child.annotation_type == constants.ANNOTATION_ACC:
                acc = child
        flex = False
        avg_flexibility = 0.0
        n_residues = 0
        for aa in seq.residues:
            if aa.is_gap:
                continue
            if aa.bfactor:
                avg_flexibility += aa.bfactor
                n_residues += 1
                if aa.bfactor > 0.0:
                    flex = True
        if n_residues > 0:
            avg_flexibility /= n_residues
        seq_string = ""
        ssa_string = ""
        acc_string = ""
        flx_string = ""
        res_ids = []
        for index, aa in enumerate(seq.residues):
            if aa.is_gap:
                continue
            seq_string += aa.code
            # Store ID (includes residue number and insertion code)
            res_ids.append(aa.id().strip())
            if ssa:
                ssa_string += ssa.residues[index].code.lower()
            if acc:
                acc_string += acc.residues[index].code
            if flex:
                if aa.bfactor < avg_flexibility:
                    flx_string += 'r'
                else:
                    flx_string += 'f'
        result = {}
        result['amino_acids'] = seq_string
        result['res_ids'] = res_ids
        if ssa:
            result['secondary_structure'] = ssa_string
        if acc:
            result['solvent_accessibility'] = acc_string
        if flex:
            result['flexibility'] = flx_string
        return result

[docs]    def findPattern(self, pattern):
        """
        Finds a specified PROSITE pattern in all sequences.
        """
        self.unselectAll()
        if pattern:
            pattern = pattern.strip().rstrip()
        if not pattern:
            for seq in self.sequences:
                seq.makeActive()
        if not find_generalized_pattern:
            return
        if not pattern:
            return
        seq_string_list = []
        sequence_list = []
        if not delete_from_str(pattern, string.ascii_uppercase):
            new_pattern = pattern[0]
            for c in pattern[1:]:
                new_pattern += '-' + c
            pattern = new_pattern
        count = 0
        for seq in self.sequences:
            if seq.isValidProtein():
                seq.makeInactive()
                seq_string_list.append(self.encode(seq))
                sequence_list.append(seq)
        results_list = find_generalized_pattern(seq_string_list, pattern)
        if results_list:
            for index, match_list in enumerate(results_list):
                seq = sequence_list[index]
                residues = seq.gaplessResidues()
                for start, end in match_list:
                    for ridx in range(start, end):
                        residues[ridx].selected = True
                    count += 1
        return results_list

    ###########################################################################
    # Sequence group sorting methods.
    ###########################################################################

[docs]    def sortKeyName(self, sequence):
        """
        Returns sequence name sort key.
        :rtype: string
        :return: sequence name sort key
        """
        return sequence.name

[docs]    def sortKeyChain(self, sequence):
        """
        Returns sequence chain ID sort key.
        :rtype: string
        :return: sequence chain ID sort key
        """
        return sequence.chain_id

[docs]    def sortKeyLength(self, sequence):
        """
        Returns sequence length sort key.
        :rtype: int
        :return: sequence length sort key
        """
        return sequence.gaplessLength()

[docs]    def sortKeyGaps(self, sequence):
        """
        Returns sequence number of gaps sort key.
        :rtype: int
        :return: sequence number of gaps sort key
        """
        return sequence.numberOfGaps()

[docs]    def sortKeyIdentity(self, sequence):
        """
        Returns sequence identity with consesus sequence sort key.
        :rtype: float
        :return: sequence identity with consesus sequence sort key
        """
        return sequence.identity

[docs]    def sortKeyHomology(self, sequence):
        """
        Returns sequence homology with reference sequence sort key.
        :rtype: float
        :return: sequence homology with reference sequence sort key
        """
        return sequence.homology

[docs]    def sortKeySimilarity(self, sequence):
        """
        Returns sequence similarity to the consesus sequence as a sort key.
        :rtype: float
        :return: sequence similarity to the consesus sequence sort key
        """
        return sequence.similarity

[docs]    def sortKeyScore(self, sequence):
        """
        Returns sequence score sort key.
        :rtype: float
        :return: sequence score sort key
        """
        return sequence.score

[docs]    def getSortableSequences(self, ignore_reference=True):
        """
        Returns a list of "sortable" sequences, i.e. all parent sequences
        that are not auxiliary objects and not global annotations.

        :type: ignore_reference: boolean
        :param: Normally, the reference is not considered a sortable sequence,
                unless ignore_reference is set to False. In such case the
                reference will be included in the sortable group.
        :rtype: list of `Sequence`
        :return: list of sortable sequences
        """
        # Extract non-global sequences.
        my_reference = None
        if ignore_reference:
            my_reference = self.reference
        return [
            seq for seq in self.sequences
            if seq.isSortable(reference=my_reference)
        ]

[docs]    def replaceSortableSequences(self, sequence_list, ignore_reference=True):
        """
        This method replaces all "sortable" sequences with a list of
        sorted sequences. The length of the given list is supposed to have
        a number of items equal to the number of sortable sequences.
        This method is used to replace sequences in the group after
        sorting operation.

        :type sequence_list: list of `Sequence`
        :param sequence_list: replacement list of sequences

        :type: ignore_reference: boolean
        :param: Normally, the reference is not considered a sortable sequence,
                unless ignore_reference is set to False. In such case the
                reference will be included in the sortable group.
        """
        # Extract non-global sequences.
        my_reference = None
        if ignore_reference:
            my_reference = self.reference
        list_index = 0
        list_size = len(sequence_list)
        for pos in range(len(self.sequences)):
            seq = self.sequences[pos]
            if seq.isSortable(reference=my_reference):
                if list_index < list_size:
                    self.sequences[pos] = sequence_list[list_index]
                    list_index += 1

[docs]    def sort(self, order, reverse_order=False):
        """
        This method sorts the sequences according to a specified order.
        Optionally, the sequences can be sorted in a reverse order.

        :type order: int
        :param order: sort order

        :type reverse_order: bool
        :param reverse_order: if True, the sequences will be reverse sorted
            (default=False, i.e. sorting from smallest to largest key)
        """
        # Calculate profile, just to be up-to-date.
        self.calculateProfile()
        sortable = self.getSortableSequences()
        # Sort the sequences according to specified sort order.
        if order == constants.SORT_NAME:
            sortable.sort(key=self.sortKeyName)
        elif order == constants.SORT_CHAIN:
            sortable.sort(key=self.sortKeyChain)
        elif order == constants.SORT_LENGTH:
            sortable.sort(key=self.sortKeyLength)
        elif order == constants.SORT_GAPS:
            sortable.sort(key=self.sortKeyGaps)
        elif order == constants.SORT_IDENTITY:
            sortable.sort(key=self.sortKeyIdentity)
        elif order == constants.SORT_HOMOLOGY:
            sortable.sort(key=self.sortKeyHomology)
        elif order == constants.SORT_SIMILARITY:
            sortable.sort(key=self.sortKeySimilarity)
        elif order == constants.SORT_SCORE:
            sortable.sort(key=self.sortKeyScore)
        if reverse_order:
            sortable.reverse()
        # Replace non-global sequences with the sorted ones.
        sorted_pos = 0
        for pos in range(len(self.sequences)):
            seq = self.sequences[pos]
            if seq.isSortable(reference=self.reference):
                self.sequences[pos] = sortable[sorted_pos]
                sorted_pos += 1
        self.updateReference()

[docs]    def moveUp(self):
        """
        Moves selected sequences to the top of the group.
        """
        sortable = self.getSortableSequences()
        if sortable and self.hasSelectedSequences():
            for seq in sortable:
                if seq.selected:
                    pos = sortable.index(seq)
                    if pos > 0:
                        sortable.remove(seq)
                        sortable.insert(pos - 1, seq)
            self.replaceSortableSequences(sortable)
        else:
            for seq in self.sequences:
                n_children = len(seq.children)
                for index in range(n_children):
                    child = seq.children[index]
                    if child.selected and index:
                        seq.children.remove(child)
                        seq.children.insert(index - 1, child)

[docs]    def moveDown(self):
        """
        Moves selected sequences to the bottom of the group.
        """
        sortable = self.getSortableSequences()
        if sortable and self.hasSelectedSequences():
            for seq in reversed(sortable):
                if seq.selected:
                    pos = sortable.index(seq)
                    sortable.remove(seq)
                    sortable.insert(pos + 1, seq)
            self.replaceSortableSequences(sortable)
        else:
            for seq in self.sequences:
                n_children = len(seq.children)
                for index in range(n_children - 1, -1, -1):
                    child = seq.children[index]
                    if child.selected and index < n_children - 1:
                        seq.children.remove(child)
                        seq.children.insert(index + 1, child)

[docs]    def moveTop(self, target=None):
        """
        Moves selected sequences to the top of the group.

        If target is specified, move only the target sequence to top.
        """
        if target == self.reference:
            # When target is reference, include the reference in the
            # sortable group.
            sortable = self.getSortableSequences(ignore_reference=False)
        else:
            sortable = self.getSortableSequences()
        if target and target in sortable:
            sortable.remove(target)
            sortable.insert(0, target)
            sorted_list = sortable
        else:
            sorted_list = []
            for seq in sortable:
                if seq.selected:
                    sorted_list.append(seq)
            for seq in sortable:
                if not seq.selected:
                    sorted_list.append(seq)
        if target == self.reference:
            self.replaceSortableSequences(sorted_list, ignore_reference=False)
        else:
            self.replaceSortableSequences(sorted_list)

[docs]    def moveBottom(self):
        """
        Moves selected sequences to the bottom of the group.
        """
        sortable = self.getSortableSequences()
        sorted_list = []
        for seq in sortable:
            if not seq.selected:
                sorted_list.append(seq)
        for seq in sortable:
            if seq.selected:
                sorted_list.append(seq)
        self.replaceSortableSequences(sorted_list)

[docs]    def sortByTreeOrder(self):
        """
        Sorts the sequences by the tree order. If there is no tree,
        returns False.

        :return: True on success, False if valid tree doesn't exist.
        :rtype: boolean
        """
        if not self.tree:
            return False
        tree_list = []
        tree = self.tree.getVisibleTree()
        tree_list = tree.getSequenceList(tree_list)
        if len(tree_list) == 0:
            return False
        # Replace all non-global sequences with sorted list.
        sorted_pos = 0
        for pos in range(min(len(self.sequences), len(tree_list))):
            seq = self.sequences[pos]
            if seq.isValidProtein():
                self.sequences[pos] = tree_list[sorted_pos]
                sorted_pos += 1
        return True

[docs]    def duplicateSelectedSequences(self):
        """
        Duplicates selected sequences.
        """
        copy_list = []
        for index, seq in enumerate(self.sequences):
            if seq.selected:
                copy_list.append((index + len(copy_list), seq.copyForUndo()))
        for index, seq in copy_list:
            self.sequences.insert(index, seq)

[docs]    def getGlobalAnnotation(self, annotation_type):
        return next((seq for seq in self.sequences
                     if seq.annotation_type == annotation_type), None)

[docs]    def addAnnotation(self, annotation_type, remove=False):
        """
        Adds an annotation sequence to selected sequences or to all sequences
        if no sequence is selected.

        :type annotation_type: int
        :param annotation_type: type of the annotation sequence
        """
        has_selected = self.hasSelectedSequences()
        seq = None
        if remove:
            for seq in self.sequences:
                if (not has_selected or seq.selected) and \
                   seq.isValidProtein() and \
                   seq.hasAnnotationType(annotation_type):
                    for child in seq.children:
                        if child.annotation_type == annotation_type:
                            seq.children.remove(child)
            return
        if annotation_type == constants.ANNOTATION_HELIX_PROPENSITY:
            # Add helix propensity annotation.
            for seq in self.sequences:
                if (not has_selected or seq.selected) and \
                   seq.isValidProtein() and \
                   not seq.hasAnnotationType(annotation_type):
                    plot = seq.createAnnotationSequence()
                    plot.name = "Helix Propensity"
                    plot.short_name = "Helix Propensity"
                    plot.annotation_type = annotation_type
                    plot.plot_style = constants.PLOT_COLOR_BLOCKS
                    for pos in range(len(seq.residues)):
                        parent_res = seq.residues[pos]
                        res = plot.residues[pos]
                        res.color = parent_res.colorHelixPropensity()
        elif annotation_type == constants.ANNOTATION_STRAND_PROPENSITY:
            # Add strand propensity annotation.
            for seq in self.sequences:
                if (not has_selected or seq.selected) and \
                   seq.isValidProtein() and \
                   not seq.hasAnnotationType(annotation_type):
                    plot = seq.createAnnotationSequence()
                    plot.name = "Strand Propensity"
                    plot.short_name = "Strand Propensity"
                    plot.annotation_type = annotation_type
                    plot.plot_style = constants.PLOT_COLOR_BLOCKS
                    for pos in range(len(seq.residues)):
                        parent_res = seq.residues[pos]
                        res = plot.residues[pos]
                        res.color = parent_res.colorStrandPropensity()
        elif annotation_type == constants.ANNOTATION_TURN_PROPENSITY:
            # Add turn propensity annotation.
            for seq in self.sequences:
                if (not has_selected or seq.selected) and \
                   seq.isValidProtein() and \
                   not seq.hasAnnotationType(annotation_type):
                    plot = seq.createAnnotationSequence()
                    plot.name = "Turn Propensity"
                    plot.short_name = "Turn Propensity"
                    plot.annotation_type = annotation_type
                    plot.plot_style = constants.PLOT_COLOR_BLOCKS
                    for pos in range(len(seq.residues)):
                        parent_res = seq.residues[pos]
                        res = plot.residues[pos]
                        res.color = parent_res.colorTurnPropensity()
        elif annotation_type == constants.ANNOTATION_STERIC_GROUP:
            # Add strand propensity annotation.
            for seq in self.sequences:
                if (not has_selected or seq.selected) and \
                   seq.isValidProtein() and \
                   not seq.hasAnnotationType(annotation_type):
                    plot = seq.createAnnotationSequence()
                    plot.name = "Steric Group"
                    plot.short_name = "Steric Group"
                    plot.annotation_type = annotation_type
                    plot.plot_style = constants.PLOT_COLOR_BLOCKS
                    for pos in range(len(seq.residues)):
                        parent_res = seq.residues[pos]
                        res = plot.residues[pos]
                        res.color = parent_res.colorStericGroup()
        elif annotation_type == constants.ANNOTATION_HELIX_TERMINATORS:
            for seq in self.sequences:
                if (not has_selected or seq.selected) and \
                   seq.isValidProtein() and \
                   not seq.hasAnnotationType(annotation_type):
                    plot = seq.createAnnotationSequence()
                    plot.name = "Helix Breaking"
                    plot.short_name = "Helix Breaking"
                    plot.annotation_type = annotation_type
                    plot.plot_style = constants.PLOT_COLOR_BLOCKS
                    for pos in range(len(seq.residues)):
                        parent_res = seq.residues[pos]
                        res = plot.residues[pos]
                        res.color = parent_res.colorHelixTerminators()
        elif annotation_type == constants.ANNOTATION_SIDECHAIN_CHEMISTRY:
            for seq in self.sequences:
                if (not has_selected or seq.selected) and \
                   seq.isValidProtein() and \
                   not seq.hasAnnotationType(annotation_type):
                    plot = seq.createAnnotationSequence()
                    plot.name = "Sidechain Chemistry"
                    plot.short_name = "Sidechain Chemistry"
                    plot.annotation_type = annotation_type
                    plot.plot_style = constants.PLOT_COLOR_BLOCKS
                    for pos in range(len(seq.residues)):
                        parent_res = seq.residues[pos]
                        res = plot.residues[pos]
                        res.color = parent_res.colorSidechainChemistry()
        elif annotation_type == constants.ANNOTATION_EXPOSURE_TENDENCY:
            for seq in self.sequences:
                if (not has_selected or seq.selected) and \
                   seq.isValidProtein() and \
                   not seq.hasAnnotationType(annotation_type):
                    plot = seq.createAnnotationSequence()
                    plot.name = "Exposure Tendency"
                    plot.short_name = "Exposure Tendency"
                    plot.annotation_type = annotation_type
                    plot.plot_style = constants.PLOT_COLOR_BLOCKS
                    for pos in range(len(seq.residues)):
                        parent_res = seq.residues[pos]
                        res = plot.residues[pos]
                        res.color = parent_res.colorExposureTendency()
        elif annotation_type == constants.ANNOTATION_BFACTOR:
            # Add strand propensity annotation.
            for seq in self.sequences:
                if (not has_selected or seq.selected) and seq.visible and \
                   seq.has_structure and seq.isValidProtein() and \
                   not seq.hasAnnotationType(annotation_type):
                    plot = seq.createAnnotationSequence()
                    plot.name = "B factor"
                    plot.short_name = "B factor"
                    plot.annotation_type = annotation_type
                    plot.plot_style = constants.PLOT_HISTOGRAM
                    plot.height = 3
                    plot.plot_color = (128, 160, 240)
                    for pos in range(len(seq.residues)):
                        parent_res = seq.residues[pos]
                        res = plot.residues[pos]
                        res.value = parent_res.bfactor
                    plot.calculatePlotValues(0)
        elif annotation_type == constants.ANNOTATION_HYDROPHOBICITY:
            # Add strand propensity annotation.
            for seq in self.sequences:
                if (not has_selected or seq.selected) and \
                   seq.isValidProtein() and \
                   not seq.hasAnnotationType(annotation_type):
                    plot = seq.createAnnotationSequence()
                    plot.name = "Hydrophobicity"
                    plot.short_name = "Hydrophobicity"
                    plot.annotation_type = annotation_type
                    plot.plot_style = constants.PLOT_HISTOGRAM
                    plot.height = 3
                    plot.plot_color = (240, 160, 240)
                    amino_acids = list(constants.HYDROPHOBICITY_SCALES)
                    for pos in range(len(seq.residues)):
                        parent_res = seq.residues[pos]
                        res = plot.residues[pos]
                        if parent_res.code in amino_acids:
                            res.value = constants.HYDROPHOBICITY_SCALES[
                                parent_res.code][constants.KD_SCALE]
                    plot.calculatePlotValues(2)
        elif annotation_type == constants.ANNOTATION_PI:
            # Add isoelectric point annotation.
            for seq in self.sequences:
                if (not has_selected or seq.selected) and \
                   seq.isValidProtein() and \
                   not seq.hasAnnotationType(annotation_type):
                    plot = seq.createAnnotationSequence()
                    plot.name = "Isoelectric Point"
                    plot.short_name = "Isoelectric Point"
                    plot.annotation_type = annotation_type
                    plot.plot_style = constants.PLOT_HISTOGRAM
                    plot.height = 3
                    plot.plot_color = (160, 240, 192)
                    amino_acids = list(constants.PI_SCALE)
                    for pos in range(len(seq.residues)):
                        parent_res = seq.residues[pos]
                        res = plot.residues[pos]
                        if parent_res.code in amino_acids:
                            res.value = constants.PI_SCALE[parent_res.code]
                    plot.calculatePlotValues(2)
        elif annotation_type == constants.ANNOTATION_IDENTITY:
            # Add strand propensity annotation.
            for seq in self.sequences:
                if (not has_selected or seq.selected) and \
                   seq.isValidProtein() and \
                   not seq.hasAnnotationType(annotation_type):
                    plot = seq.createAnnotationSequence()
                    plot.name = "Identity"
                    plot.short_name = "Identity"
                    plot.annotation_type = annotation_type
                    plot.plot_style = constants.PLOT_LINE
                    plot.height = 3
                    amino_acids = list(constants.HYDROPHOBICITY_SCALES)
                    for pos in range(len(seq.residues)):
                        parent_res = seq.residues[pos]
                        res = plot.residues[pos]
                        res.is_gap = parent_res.is_gap
                        if parent_res.code in amino_acids:
                            res.value = \
                                constants.HYDROPHOBICITY_SCALES[
                                    parent_res.code][
                                    constants.KD_SCALE]
                    plot.calculatePlotValues(2)
        elif annotation_type == constants.ANNOTATION_RESNUM:
            # Add residue number annotation
            for seq in self.sequences:
                if (not has_selected or seq.selected) and \
                   seq.isValidProtein() and \
                   not seq.hasAnnotationType(annotation_type):
                    plot = seq.createAnnotationSequence()
                    plot.name = "Residue Numbers"
                    plot.short_name = ""
                    plot.annotation_type = annotation_type
        elif annotation_type == constants.ANNOTATION_SSBOND:
            # Add SSBond annotation
            for seq in self.sequences:
                if (not has_selected or seq.selected) and \
                   seq.isValidProtein() and \
                   not seq.hasAnnotationType(annotation_type):
                    if not seq.ssb_bond_list:
                        continue
                    plot = seq.createSSBondAssignment()
                    seq.children.insert(0, plot)
                    plot.annotation_type = annotation_type
                    plot.height = int(len(seq.ssb_bond_list) * 0.5 + 0.5)
                    plot.parent_sequence = seq
        if seq:
            seq.propagateGapsToChildren()

[docs]    def addCustomAnnotation(self,
                            sequence=None,
                            title="Custom Annotation",
                            name="",
                            region_list=[]):  # noqa: M511
        """
        This function adds or updates a custom annotation in the specified
        sequence.

        :param region_list: List of residue regions. Each item should include:
                (first_res_id, last_res_id, label, color)
        """
        for plot in sequence.children:
            if plot.annotation_type == constants.ANNOTATION_CUSTOM:
                sequence.children.remove(plot)
                break
        for seq in self.sequences:
            if seq == sequence and seq.isValidProtein() and \
                        not seq.hasAnnotationType(constants.ANNOTATION_CUSTOM):
                plot = seq.createAnnotationSequence()
                plot.name = title
                plot.short_name = name
                plot.annotation_type = constants.ANNOTATION_CUSTOM
                plot_residues = plot.gaplessResidues()
                for first, last, label, color in region_list:
                    lab_idx = 0
                    for index in range(first, last + 1):
                        if index < len(plot_residues):
                            plot_residues[index].color = color
                            plot_residues[index].code = ' '
                            if lab_idx < len(label):
                                plot_residues[index].code = label[lab_idx]
                                lab_idx += 1
                        # else:
                        #    plot_residues[index].color = res.color
                        # Assign gaps
                for pos in range(len(seq.residues)):
                    parent_res = seq.residues[pos]
                    res = plot.residues[pos]
                    res.inverted = True
                    res.is_gap = parent_res.is_gap

[docs]    def selectRegions(self, sequence, region_list):
        """
        Select residue subsets based on a region list.
        """
        for first, last, label, color in region_list:
            for index in range(first, last + 1):
                sequence.residues[index].selected = True

[docs]    def selectRedundantSequences(self, value, columns=False, reference=None):
        """
        Selects sequences below a specified identity threshold value.
        """
        aa_sequences = [seq for seq in self.sequences if seq.isValidProtein()]
        n_aa_sequences = len(aa_sequences)
        if n_aa_sequences == 0:
            return
        for seq in aa_sequences:
            seq.selected = False
        if reference:
            for n in range(n_aa_sequences):
                id = aa_sequences[n].calcIdentity(reference, self.consider_gaps,
                                                  columns)
                if id * 100 >= value:
                    aa_sequences[n].selected = True
        else:
            for n in range(n_aa_sequences):
                for m in range(n):
                    id = aa_sequences[n].calcIdentity(aa_sequences[m],
                                                      self.consider_gaps,
                                                      columns)
                    if id * 100 >= value:
                        len_n = aa_sequences[n].gaplessLength()
                        len_m = aa_sequences[m].gaplessLength()
                        if len_n < len_m:
                            aa_sequences[n].selected = True
                        else:
                            aa_sequences[m].selected = True

[docs]    def clearConstraints(self):
        for seq in self.sequences:
            if seq.type == constants.SEQ_CONSTRAINTS:
                seq.constraint_list = []
                break

[docs]    def removeConstraints(self):
        for seq in self.sequences:
            if seq.type == constants.SEQ_CONSTRAINTS:
                self.sequences.remove(seq)
                break

[docs]    def hasConstraints(self):
        for seq in self.sequences:
            if seq.type == constants.SEQ_CONSTRAINTS:
                if len(seq.constraint_list) > 0:
                    return True
        return False

[docs]    def addConstraint(self, seq1, pos1, seq2, pos2, for_prime=False):
        """
        Adds a pairwise alignment constraint. At least one of the specified
        sequences has to be a reference sequence.

        :rtype: boolean
        :return: True if the constrait was successfully added, False otherwise
        """
        if self.reference not in self.sequences:
            return False
        index = self.sequences.index(self.reference)
        next_seq = None
        for seq in self.sequences[index + 1:]:
            if seq.isValidProtein():
                next_seq = seq
                break
        if not next_seq:
            return False
        cons_seq = None
        for seq in self.sequences:
            if seq.type == constants.SEQ_CONSTRAINTS:
                cons_seq = seq
                break
        if not cons_seq:
            cons_seq = Sequence()
            cons_seq.constraint_list = []
            cons_seq.type = constants.SEQ_CONSTRAINTS
            cons_seq.height = 2
            cons_seq.name = cons_seq.short_name = "Constraints"
            self.sequences.insert(index + 1, cons_seq)
        if seq1 is None and seq2 is None:
            return True
        if seq1 != self.reference and seq2 != self.reference:
            return False
        if seq2 == self.reference:
            # Reorder if seq1 is not a reference.
            tmp = seq1
            seq1 = seq2
            seq2 = tmp
            tmp = pos1
            pos1 = pos2
            pos2 = tmp
        if for_prime:
            seq1 = self.reference
            seq2 = self.reference
            next_seq = self.reference
            if pos2 > pos1:
                tmp = pos2
                pos2 = pos1
                pos1 = tmp
        ref_pos = pos1
        pos = pos2
        if ref_pos is None or pos is None:
            return False
        real_ref_pos = self.reference.getUngappedIndex(ref_pos)
        real_pos = next_seq.getUngappedIndex(pos)
        for constraint in cons_seq.constraint_list:
            start, end, seq, prime = constraint
            if start == real_ref_pos and end == real_pos:
                cons_seq.constraint_list.remove(constraint)
                return
        cons_seq.constraint_list.append(
            (real_ref_pos, real_pos, next_seq, for_prime))
        return True

[docs]    def statistics(self):
        total, selected, hidden, maestro = (0, 0, 0, 0)
        for seq in self.sequences:
            if not seq.type == constants.SEQ_RULER and \
               not seq.type == constants.SEQ_SEPARATOR:
                if not seq.visible:
                    hidden += 1
                if seq.selected:
                    selected += 1
                if seq.from_maestro:
                    maestro += 1
                total += 1

        return total, selected, maestro, hidden

[docs]    def setReference(self, ref=None):
        """
        Sets a reference sequence.
        """
        # To avoid any confusion, just remove the constraints.
        self.removeConstraints()
        if ref and ref in self.sequences and (ref.isValidProtein() or ref.type
                                              == constants.SEQ_CONSENSUS):
            self.reference = ref
            self.moveTop(target=ref)
            return True
        elif self.hasSelectedSequences():
            for seq in self.sequences:
                if seq.selected:
                    if seq.type == constants.SEQ_CONSENSUS or \
                       seq.isValidProtein():
                        self.reference = seq
                        self.moveTop(target=seq)
                        return True
        return False

[docs]    def removeMaestroSequences(self):
        """
        Removes all Maestro sequences.
        """
        self.sequences = [seq for seq in self.sequences if not seq.from_maestro]
        self.removeLonelyRuler()

[docs]    def removeLonelyRuler(self):
        """
        Remove ruler but only if there is nothing else left.

        :rtype: bool
        :return: True if the ruler was removed
        """
        seq = None
        for seq in self.sequences:
            if seq.type != constants.SEQ_RULER:
                return False
        if seq:
            self.sequences = []
        return True

[docs]    def toggleHistory(self):
        """
        Adds or removes a sequence modification history string.
        """
        for seq in self.sequences:
            if seq.type == constants.SEQ_HISTORY:
                self.sequences.remove(seq)
                return

        history = Sequence()
        history.type = constants.SEQ_HISTORY
        history.name = "History of Changes"
        history.short_name = history.name[:]
        self.sequences.append(history)

[docs]    def resetHistory(self):
        """
        This function resets change tracking string.
        """
        history = None
        for seq in self.sequences:
            if seq.type == constants.SEQ_HISTORY:
                history = seq
                break
        if history:
            for res in history.residues:
                res.value = 0.0
                res.color = (255, 255, 255)
            # if self.viewer:
            #    self.viewer.updateView()

[docs]    def updateHistory(self, start, end):
        """
        This function updates an internal history string.

        :rtype: bool
        :return: True if the history was updated, False otherwise.
        """
        history = None
        for seq in self.sequences:
            if seq.type == constants.SEQ_HISTORY:
                history = seq
                break
        if history:
            if end < start:
                tmp = start
                start = end
                end = tmp
            if end >= history.length():
                new_length = end - history.length() + 1
                for pos in range(new_length):
                    res = Residue()
                    res.sequence = history
                    res.color = (255, 255, 255)
                    res.value = 0.0
                    res.code = ' '
                    res.inverted = True
                    history.residues.append(res)
            for res in history.residues:
                res.value -= 0.1
                if res.value < 0.0:
                    res.value = 0.0
                elif res.value < 0.2:
                    res.value = 0.2
                color = int(255 * (1.0 - res.value))
                res.color = (color, color, color)
            for pos in range(start, end):
                res = history.residues[pos]
                res.color = (0, 0, 0)
                res.value = 1.0
            # Update the viewer.
            # if self.viewer:
            #    self.viewer.updateView()
            return True

        return False

[docs]    def setConsiderGaps(self, value):
        """
        Sets value of consider gaps flag. If set to True, gaps will be
        included in calculation of local sequence similarity measures.

        :type value: bool
        :param value: Should we consider gaps for sequence identity
            calculations.
        """
        self.consider_gaps = value
        self.calculateProfile()
        self.updateVariableSequences()
        # if self.viewer:
        #    self.viewer.updateView()

[docs]    def updateSSA(self, remove=False):
        """
        Updates secondary structure assignments for all structure-coupled
        sequences.
        """
        any_selected = self.hasSelectedSequences()
        for seq in self.sequences:
            if not any_selected or seq.selected:
                if seq.has_structure:
                    for child in seq.children:
                        if child.type == constants.SEQ_SECONDARY:
                            seq.children.remove(child)
                            break
                    if remove:
                        continue
                    ssa = seq.createSecondaryAssignment()
                    for pos in range(len(ssa.residues)):
                        ss_res = ssa.residues[pos]
                        ss_res.code = seq.residues[pos].ss_code
                        if not seq.residues[pos].is_gap:
                            ss_res.inverted = True
                            if ss_res.code == 'H':
                                ss_res.color = constants.SS_COLOR_HELIX
                            elif ss_res.code == 'E':
                                ss_res.color = constants.SS_COLOR_EXTENDED
                            else:
                                ss_res.color = constants.SS_COLOR_COIL
                    seq.children.insert(0, ssa)
                    ssa.parent_sequence = seq
                    seq.propagateGapsToChildren()

        # if self.viewer:
        #    self.viewer.updateView()

[docs]    def expandSelection(self):
        """
        Expands selection to include entire columns.
        """
        no_selected = not self.hasSelectedSequences()
        for pos in range(self.findMaxLength()):
            for seq in self.sequences:
                if pos < seq.length():
                    if seq.selected or no_selected:
                        if seq.residues[pos].selected:
                            self.selectColumns(pos, pos)
                            break

[docs]    def expandSelectionRef(self):
        """
        Expands selection from the reference sequence to include entire columns.
        """
        if not self.reference:
            return
        for pos in range(self.reference.length()):
            if self.reference.residues[pos].selected:
                self.selectColumns(pos, pos)

[docs]    def hideColumns(self, unselected=False):
        """
        Hides selected or un-selected columns.
        """
        max = self.findMaxLength()
        # Just to be sure we are not going to loose any residues here.
        self.showAllResidues()
        for pos in range(max):
            if self.isColumnSelected(pos):
                if not unselected:
                    self.hideColumn(pos)
            elif unselected:
                self.hideColumn(pos)
        for seq in self.sequences:
            if seq.isValidProtein():
                seq.tmp_residues = copy.copy(seq.residues)
                seq.residues = [res for res in seq.residues if res.visible]
                seq.tmp_children = []
                for child in seq.children:
                    seq.tmp_children.append(copy.copy(child.residues))
                    child.residues = [
                        res for res in child.residues if res.visible
                    ]

[docs]    def isColumnSelected(self, pos, weak=False):
        """
        Returns True if the specified column is selected, False otherwise.

        :type weak: bool
        :param weak: If weak is False (default) treat the column as selected
                     only if all residues in the column are selected.
        """
        for seq in self.sequences:
            if seq.isValidProtein():
                if pos < seq.length():
                    if not weak:
                        if not seq.residues[pos].selected:
                            return False
                    else:
                        if seq.residues[pos].selected:
                            return True
        return not weak

[docs]    def hideColumn(self, pos):
        """
        Hides the specified column.
        """
        for seq in self.sequences:
            if seq.isValidProtein():
                if pos < seq.length():
                    seq.residues[pos].visible = False
                    for child in seq.children:
                        child.residues[pos].visible = False

[docs]    def showAllResidues(self):
        """
        Makes all residues in all sequences visible.
        """
        for seq in self.sequences:
            if seq.isValidProtein():
                if seq.tmp_residues:
                    seq.residues = seq.tmp_residues
                    for res in seq.residues:
                        res.visible = True
                    seq.tmp_residues = None
                if seq.tmp_children:
                    for index, child in enumerate(seq.children):
                        child.residues = seq.tmp_children[index]
                    seq.tmp_children = None
                for child in seq.children:
                    for res in child.residues:
                        res.visible = True

[docs]    def markResidues(self, rgb):
        for seq in self.sequences:
            if seq.isValidProtein():
                for res in seq.residues:
                    if res.selected:
                        res.marked_color = rgb

[docs]    def clearMarkedResidues(self):
        no_selected = not self.hasSelectedResidues()
        for seq in self.sequences:
            if seq.isValidProtein():
                for res in seq.residues:
                    if no_selected or res.selected:
                        res.marked_color = None

[docs]    def cropSelectedResidues(self):
        if self.hasSelectedResidues():
            self.invertSelection()
            self.deleteSelectedResidues()

[docs]    def markTemplateRegion(self):
        for seq in self.sequences:
            if seq.has_structure and \
               seq != self.reference:
                for index, res in enumerate(seq.residues):
                    if res.selected:
                        res.model = True
                        res.selected = False
                        for seq2 in self.sequences:
                            if seq != seq2:
                                if index < seq2.length():
                                    seq2.residues[index].model = False
                # msv_maestro.propagateColorsToMaestro(
                #    self.viewer, seq, template_colors=True)
        self.unselectAll()

[docs]    def premarkTemplateRegion(self):
        if not self.hasValidTemplates():
            return False
        for seq in self.sequences:
            if seq.isValidTemplate(reference=self.reference):
                for res in seq.residues:
                    if res.model:
                        return False
        for seq in self.sequences:
            if seq.isValidTemplate(reference=self.reference):
                for index, res in enumerate(seq.residues):
                    res.model = True
                break
        self.unselectAll()
        # if self.viewer:
        #    self.viewer.repaint()
        return True

[docs]    def unmarkTemplateRegions(self):
        for seq in self.sequences:
            if seq.has_structure and seq != self.reference:
                for res in seq.residues:
                    res.model = False
                # msv_maestro.propagateColorsToMaestro(self.viewer,
                #    seq, template_colors=True)

        self.unselectAll()

[docs]    def getStructureList(self, omit_reference=False):
        """
        Returns a list of visible sequences associated with structures.
        """
        return [
            seq for seq in self.sequences
            if seq.has_structure and seq.visible and
            (not omit_reference or (seq != self.reference))
        ]

[docs]    def getMaestroSequencesForEntryId(self, entry_id):
        seq_list = []
        for seq in self.sequences:
            if seq.from_maestro and seq.maestro_entry_id == entry_id:
                seq_list.append(seq)

        return seq_list

[docs]    def colorSequenceNames(self, color):
        no_selected = not self.hasSelectedSequences()
        for seq in self.sequences:
            if no_selected or seq.selected:
                seq.color = color
                children_selected = seq.hasSelectedChildren()
                for child in seq.children:
                    if not children_selected or child.selected:
                        child.color = color

[docs]    def selectFirstTemplate(self, n_templates=1):
        """
        Selects first available valid template from a template list.
        If a template is alread selected, do nothing. Optionally, select
        n_templates valid templates instead of just the first one.
        """
        for seq in self.sequences:
            if seq.selected and seq.isValidTemplate(reference=self.reference):
                return
        n = 0
        for seq in self.sequences:
            if seq.isValidTemplate(reference=self.reference):
                seq.selected = True
                n += 1
                if n == n_templates:
                    return

[docs]    def hasValidTemplates(self):
        for seq in self.sequences:
            if seq.isValidTemplate(reference=self.reference):
                return True
        return False

[docs]    def getTemplates(self, selected_only=False):
        """
        Return a list of
        `sequences<schrodinger.ui.sequencealignment.sequence.Sequence`
        that are valid templates.
        """
        templates = []
        for seq in self.sequences:
            if selected_only and not seq.selected:
                continue
            if seq.isValidTemplate(reference=self.reference):
                templates.append(seq)
        return templates

[docs]    def copySequences(self, group):
        not_selected = not group.hasSelectedSequences()
        for seq in group.sequences:
            if (seq.selected or not_selected) and \
            seq.type != constants.SEQ_RULER:
                seq_copy = seq.copyForUndo()
                self.sequences.append(seq_copy)

[docs]    def calculateMatrix(self):
        """
        Calculates a substitution matrix based on the current alignment.
        """
        # Calculate expected frequencies
        p_exp = {}
        amino_acids = list(constants.AMINO_ACIDS)
        for aa in amino_acids:
            p_exp[aa] = 0.0
        total = 1.0
        for seq in self.sequences:
            if seq.isValidProtein():
                for res in seq.residues:
                    if res.code in p_exp:
                        p_exp[res.code] += 1.0
                        total += 1.0
        for aa in list(p_exp):
            p_exp[aa] /= total

        # Calculate target (observed) pair frequencies
        q = {}
        m = {}
        for aa in amino_acids:
            for ab in amino_acids:
                q[aa + ab] = 0.0
                m[aa + ab] = 0.0
        total = 0.0
        max_length = self.findMaxLength()
        for pos in range(max_length):
            for seq1 in self.sequences:
                if seq1.isValidProtein() and pos < seq1.length():
                    code1 = seq1.residues[pos].code
                    for seq2 in self.sequences:
                        if seq2.isValidProtein and pos < seq2.length():
                            code2 = seq2.residues[pos].code
                            cc = code1 + code2
                            if cc in q:
                                q[cc] += 1.0
                                total += 1.0
        for aa in amino_acids:
            for ab in amino_acids:
                # NB: If the group is empty this causes a divide by zero error
                q[aa + ab] /= total
                if q[aa + ab] > 0.0:
                    m[aa + ab] = math.log(
                        old_div(q[aa + ab], (p_exp[aa] * p_exp[ab])))
        return m

[docs]    def alignByResidueNumbers(self):
        """
        Aligns the sequences so that identical residue numbers
        are lined up in columns.
        """
        minres = maxres = None
        seq_list = []
        new_res_list = []
        # Find residue number range
        for seq in self.sequences:
            if seq.isValidProtein():
                seq_list.append(seq)
                seq.residues = seq.gaplessResidues()
                new_res_list.append([])
                for res in seq.residues:
                    if minres is None:
                        minres = res.num
                    elif minres > res.num:
                        minres = res.num
                    if maxres is None:
                        maxres = res.num
                    elif maxres < res.num:
                        maxres = res.num
        # Iterate over residue number range
        resnum = minres
        while resnum <= maxres:
            found = True
            while found:
                found = False
                # Check if there are any residues at this position
                for index, seq in enumerate(seq_list):
                    if seq.residues:
                        if seq.residues[0].num <= resnum:
                            found = True
                            break
                if found:
                    # If yes, pick the residue if resnum matches
                    # or insert a gap if not
                    for index, seq in enumerate(seq_list):
                        if seq.residues:
                            if seq.residues[0].num <= resnum:
                                new_res_list[index].append(seq.residues.pop(0))
                            else:
                                gap = Residue()
                                gap.sequence = seq
                                gap.is_gap = True
                                gap.code = constants.UNLOCKED_GAP_SYMBOL
                                new_res_list[index].append(gap)
            resnum += 1
        for index, seq in enumerate(seq_list):
            if seq.isValidProtein():
                seq.residues = new_res_list[index]
                seq.propagateGapsToChildren()

[docs]    def addCustomSequence(self):
        sequence = Sequence()
        sequence.type = constants.SEQ_CUSTOM
        sequence.name = ""
        sequence.short_name = "Custom annotation"
        max_length = self.findMaxLength()
        sequence.residues = [
            Residue(sequence=sequence,
                    code=' ',
                    inverted=False,
                    color=self.inv_background_color) for x in range(max_length)
        ]
        self.sequences.append(sequence)

    #USED ONLY IN TESTING! (delete later)
[docs]    def getSequenceNameList(self):
        """
        Returns a list of sequence names.
        """
        name_list = []
        for seq in self.sequences:
            name_list.append(seq.name)
        return name_list

[docs]    def repair(self):
        """
        Repairs the group by setting sequence-residue associations
        for all sequences in group. Also, adds missing attributes
        (using default values) to the group.

        Useful if the group comes from a corrupted project file.
        """
        # Create dummy objects
        empty_sequence = Sequence()
        empty_residue = Residue()
        for seq in self.sequences:
            # Add missing sequence attributes
            for attr in list(empty_sequence.__dict__):
                if not hasattr(seq, attr):
                    setattr(seq, attr, getattr(empty_sequence, attr))
            for res in seq.residues:
                # Update residue parent
                res.sequence = seq
                # Add missing residue attributes
                for attr in list(empty_residue.__dict__):
                    if not hasattr(res, attr):
                        setattr(res, attr, getattr(empty_residue, attr))
            for child in seq.children:
                # Add missing child sequence attributes
                for attr in list(empty_sequence.__dict__):
                    if not hasattr(child, attr):
                        setattr(child, attr, getattr(empty_sequence, attr))
                for child_res in child.residues:
                    # Update child residue parent
                    child_res.sequence = child
                    # Add missing child residue attributes
                    for attr in list(empty_residue.__dict__):
                        if not hasattr(child_res, attr):
                            setattr(child_res, attr,
                                    getattr(empty_residue, attr))
        # Add missing group attributes
        empty_group = SequenceGroup()
        for attr in list(empty_group.__dict__):
            if not hasattr(self, attr):
                setattr(self, attr, getattr(empty_group, attr))