Source code for schrodinger.ui.sequencealignment.sequence_group

"""
Implementation of SequenceGroup class.

Copyright Schrodinger, LLC. All rights reserved.
"""

# Contributors: Piotr Rotkiewicz

import copy
import math
import string
from past.utils import old_div

import schrodinger
from schrodinger.ui.sequencealignment.sequence import delete_from_str

from . import constants
from . import utils
from .residue import Residue
from .sequence import Sequence

try:
    from schrodinger.protein.sequence import find_generalized_pattern
except ImportError:
    find_generalized_pattern = None

maestro = schrodinger.get_maestro()


[docs]class SequenceGroup(object): """ This class is a container for the sequences displayed in the sequence viewer. The class performs various operations on sets of sequences or on the entire group. After the group is modified, `SequenceViewer` update() method should be called. """
[docs] def __init__(self): self.sequences = [] self.profile = None #Global consensus profile sequence. self.reference = None #Reference sequence self.tree = None self.consider_gaps = True # when calculating sequence identity. self.name = "" #Group name self.color_mode = constants.COLOR_SIDECHAIN_CHEMISTRY #current color scheme. self.build_mode = 0 #: Homology modeling mode. self.user_annotations = [] self.custom_color = (255, 255, 255) self.background_color = (255, 255, 255) self.inv_background_color = (0, 0, 0) #: Calculate sequence identity in selected columns only self.identity_in_columns = False
[docs] def copyForUndo(self, attributes_only=True): group_copy = copy.copy(self) if attributes_only: group_copy.sequences = [] for seq in self.sequences: group_copy.sequences.append(seq.copyForUndo(deep_copy=False)) group_copy.profile = copy.copy(self.profile) group_copy.tree = copy.copy(self.tree) else: group_copy.sequences = copy.deepcopy(self.sequences) group_copy.profile = copy.deepcopy(self.profile) group_copy.tree = copy.deepcopy(self.tree) return group_copy
[docs] def clear(self): """ Deletes the entire contents of self. """ self.sequences = [] self.profile = Sequence() self.tree = None self.reference = None
[docs] def addSequence(self, sequence): """ Adds a new sequence to self. """ if sequence and sequence not in self.sequences: self.sequences.append(sequence)
[docs] def hasSelectedResidues(self): """ Check if the group has any residues selected. :rtype: bool :return: True if there are any residues selected within the group, False otherwise """ for seq in self.sequences: for res in seq.residues: if res.selected: return True return False
#Used in tests only, but should get deleted eventually.
[docs] def countSelectedResidues(self): """ Counts selected residues. :rtype: int :return: Number of selected residues. """ count = 0 for seq in self.sequences: if seq.isValidProtein(): for res in seq.residues: if res.selected: count += 1 return count
[docs] def hasSelectedSequences(self, exclude_reference=False, check_children=False): """ Check if there are any selected sequences within the group. :type check_children: bool (default=False) :param check_children: optional parameter, if True, the function will also check child sequences. :rtype: bool :return: True if there are any sequences selected in the group, False otherwise """ for seq in self.sequences: if seq.selected and (not exclude_reference or seq != self.reference): return True if check_children: for child in seq.children: if child.selected: return True return False
[docs] def selectColumns(self, start, end, select=True): """ Select all columns within alignment positon range from start to end. :type start: int :param start: first column to be selected :type end: int :param end: last column to be selected """ if start is None or end is None: return if start > end: tmp = end end = start start = tmp if start < 0: start = 0 for seq in self.sequences: for column in range(start, end + 1): if seq.length() > column: seq.residues[column].selected = select
[docs] def unselectAll(self, make_active=False): """ Unselect all residues within the entire group. """ for seq in self.sequences: if make_active: seq.makeActive() seq.unselectResidues() for child in seq.children: child.unselectResidues()
[docs] def anchorSelection(self): for seq in self.sequences: if seq.isValidProtein(): for res in seq.residues: if res.selected: res.selected = False res.active = True else: res.active = False
[docs] def clearAnchors(self): for seq in self.sequences: if seq.isValidProtein(): for res in seq.residues: res.active = True
[docs] def hideSelected(self): """ Hide all selected sequences (including children). """ for seq in self.sequences: if seq.selected: seq.visible = False seq.selected = False for child in seq.children: child.selected = False
[docs] def hideSequences(self, sequences): """ Hide all sequences passed in list. :param sequences: sequences to hide :type sequences: list of `sequences<Sequence>` """ for seq in sequences: seq.visible = False seq.selected = False for child in seq.children: child.visible = False child.selected = False
[docs] def showSequences(self, sequences): """ Show all sequences passed in list. :param sequences: sequences to show :type sequences: list of `sequences<Sequence>` """ for seq in sequences: seq.visible = True for child in seq.children: child.visible = True
[docs] def showAll(self): """ Makes all sequences within the group visible. """ for seq in self.sequences: seq.visible = True for child in seq.children: child.visible = True
[docs] def deleteSelected(self): """ Deletes all selected sequences. :note: tree will be removed. """ filtered_sequences = [seq for seq in self.sequences if not seq.selected] self.sequences = filtered_sequences for seq in self.sequences: seq.children = [ child for child in seq.children if not child.selected ] if len(self.sequences) == 0: self.profile = None self.reference = None self.tree = [] self.updateReference() return self.removeLonelyRuler()
[docs] def updateReference(self, ignore_maestro=False): """ This function updates the reference sequence. """ if not self.reference or self.reference not in self.sequences: # Reset just in case the reference is not valid anymore. self.reference = None for seq in self.sequences: if seq and seq.isValidProtein(): if ignore_maestro and seq.from_maestro: continue self.reference = seq break # Move refrence to the top position. if self.reference and self.reference in self.sequences: # self.sequences.remove(self.reference) index = 0 for index, seq in enumerate(self.sequences): if seq and not \ (seq.type == constants.SEQ_RULER or seq.global_sequence): break self.sequences.remove(self.reference) self.sequences.insert(index, self.reference) self.addRuler(update_only=True)
[docs] def fillGaps(self): """ Replaces all selected residues in visible sequences with gaps. """ none_selected = not self.hasSelectedSequences() for seq in self.sequences: if seq.visible and (none_selected or seq.selected): for res in seq.residues: if res.selected: res.code = constants.UNLOCKED_GAP_SYMBOL res.is_gap = True res.inverted = False res.color = (0, 0, 0)
[docs] def removeAllGaps(self): """ Removes all gaps from the visible sequences. """ none_selected = not self.hasSelectedSequences() for seq in self.sequences: if seq.visible and (none_selected or seq.selected): seq.removeAllGaps(selected_only=True) seq.propagateGapsToChildren()
[docs] def deleteSelectedResidues(self): """ Deletes selected residues from the visible sequences. """ for seq in self.sequences: if seq.visible: for child in seq.children: for index, res in enumerate(child.residues): if index < seq.length(): res.selected = seq.residues[index].selected else: # Truncate extra residues. This shoud not # normally be necessary unless the sequence # comes from a corrupted project. res.selected = True seq.deleteSelectedResidues() for child in seq.children: child.deleteSelectedResidues()
[docs] def isColumnEmpty(self, position): """ Checks if a column at given position is empty. """ for seq in self.sequences: length = seq.length() if seq.isValidProtein() and position < length: if not seq.residues[position].is_gap: return False return True
[docs] def minimizeAlignment(self, query_only=False): """ Minimizes the alignment, i.e. removes all gaps from the gap-only columns. """ max_length = self.findMaxLength() for pos in range(max_length): gaps_only = True if query_only: if self.reference and pos < self.reference.length() and \ self.reference.residues[pos].is_gap != True: gaps_only = False else: for seq in self.sequences: length = seq.length() if seq.isValidProtein() and pos < length: res = seq.residues[pos] if not res.is_gap: gaps_only = False break if gaps_only: for seq in self.sequences: length = seq.length() if seq.isValidProtein() and pos < length: seq.residues[pos].num = None for child in seq.children: child.residues[pos].num = None for seq in self.sequences: if seq.isValidProtein(): seq.residues = [ res for res in seq.residues if res.num is not None ] for child in seq.children: child.residues = [ res for res in child.residues if res.num is not None ]
[docs] def lockGaps(self): """ Locks gaps in visible sequences. """ no_selected = not self.hasSelectedResidues() for seq in self.sequences: if seq.visible: for pos in range(len(seq.residues)): parent_res = seq.residues[pos] if parent_res.is_gap and \ (no_selected or parent_res.selected): parent_res.code = constants.LOCKED_GAP_SYMBOL for child in seq.children: if pos not in list(range(len(child.residues))): continue res = child.residues[pos] if res.is_gap and (no_selected or parent_res.selected): res.code = constants.LOCKED_GAP_SYMBOL
[docs] def unlockGaps(self): """ Unlocks gaps in visible sequences. """ no_selected = not self.hasSelectedResidues() for seq in self.sequences: if seq.visible: for pos in range(len(seq.residues)): parent_res = seq.residues[pos] if parent_res.is_gap and \ (no_selected or parent_res.selected): parent_res.code = constants.UNLOCKED_GAP_SYMBOL for child in seq.children: if pos not in list(range(len(child.residues))): continue res = child.residues[pos] if res.is_gap and (no_selected or parent_res.selected): res.code = constants.UNLOCKED_GAP_SYMBOL
[docs] def selectAlignedBlocks(self, selected_only=False): """ Selects aligned blocks (regions without gaps). """ self.unselectAll() any_selected = self.hasSelectedSequences() if not any_selected: selected_only = False max_length = self.findMaxLength() for pos in range(max_length): n_res = 0 n_id = 0 for seq in self.sequences: if seq.isValidProtein() and (not selected_only or seq.selected): n_res += 1 if pos < seq.length(): if not seq.residues[pos].is_gap: n_id += 1 if n_res == n_id: for seq in self.sequences: if seq.isValidProtein() and \ (not selected_only or seq.selected): if pos < seq.length() and \ not seq.residues[pos].is_gap: seq.residues[pos].selected = True
[docs] def selectStructureBlocks(self, selected_only=False): """ Selects aligned blocks (regions without gaps). """ self.unselectAll() max_length = self.findMaxLength() for pos in range(max_length): n_id = 0 for seq in self.sequences: if seq.isValidProtein() and (not selected_only or seq.selected): if pos < seq.length(): if not seq.residues[pos].structureless: n_id += 1 if n_id > 0: for seq in self.sequences: if seq.isValidProtein() and \ (not selected_only or seq.selected): if pos < seq.length() and \ not seq.residues[pos].is_gap: seq.residues[pos].selected = True
[docs] def selectIdentities(self): """ Selects identical residues in the alignment, ignores gaps. """ self.unselectAll() max_length = self.findMaxLength() for pos in range(max_length): code = None n_res = 0 n_id = 0 for seq in self.sequences: if seq.isValidProtein(): if pos < seq.length(): if not seq.residues[pos].is_gap: n_res = n_res + 1 if code is None: code = seq.residues[pos].code n_id += 1 elif seq.residues[pos].code == code: n_id += 1 if n_res > 0 and n_res == n_id: for seq in self.sequences: if pos < seq.length() and \ not seq.residues[pos].is_gap: seq.residues[pos].selected = True
[docs] def selectResidues(self, start_row, start_pos, end_row, end_pos, select): """ Selects residues within a specified region. :type start_row: (Sequence, int, int, int) :param start_row: initial row :type start_pos: int :param start_pos: initial position :type end_row: (Sequence, int, int, int) :param end_row: final row :type end_pos: int :param end_pos: final position """ if start_pos is None or end_pos is None: return start_seq, start, dummy, height = start_row end_seq, dummy, end, height = end_row if start_pos > end_pos: tmp = end_pos end_pos = start_pos start_pos = tmp if start_seq.parent_sequence: start_seq = start_seq.parent_sequence if end_seq.parent_sequence: end_seq = end_seq.parent_sequence # Block selection. if (start_seq not in self.sequences) or (end_seq not in self.sequences): return start_seq_idx = self.sequences.index(start_seq) end_seq_idx = self.sequences.index(end_seq) if start_seq_idx > end_seq_idx: tmp = end_seq_idx end_seq_idx = start_seq_idx start_seq_idx = tmp for seq_idx in range(start_seq_idx, end_seq_idx + 1): seq = self.sequences[seq_idx] for pos in range(start_pos, end_pos + 1): res = seq.getResidue(pos) if res: res.selected = select for child in seq.children: res = child.getResidue(pos) if res: res.selected = select
[docs] def grabAndDrag(self, start_row, start_pos, end_row, end_pos, lock_down, gap_insert_mode=False, slide_sequence=False, block_length=0): """ This method performs a grab-and-drag operation on a specified region. :type start_row: (Sequence, int, int, int) :param start_row: initial row :type start_pos: int :param start_pos: initial position :type end_row: (Sequence, int, int, int) :param end_row: final row :type end_pos: int :param end_pos: final position :type lock_down: bool :param lock_down: indicates if the sequence downstream should be locked """ if start_row is None or end_row is None: return start_seq, start, dummy, height = start_row end_seq, dummy, end, height = end_row if start_seq.parent_sequence: start_seq = start_seq.parent_sequence if end_seq.parent_sequence: end_seq = end_seq.parent_sequence if start_seq is not None and start_pos is not None and \ end_pos is not None and not start_seq.global_sequence: parent_sequence = seq = start_seq if slide_sequence: if end_pos > start_pos: n_inserted = seq.insertGaps(0, end_pos - start_pos) for pos in range(n_inserted): if self.isColumnEmpty(0): for other_seq in self.sequences: other_seq.removeGaps(0, 1) elif end_pos < start_pos: diff = start_pos - end_pos n_removed = seq.removeGapsBackwards(0, start_pos - end_pos) if diff > n_removed: for other_seq in self.sequences: if other_seq.isValidProtein() and other_seq != seq: other_seq.insertGaps(0, diff - n_removed) return if start_pos >= 0 and start_pos < parent_sequence.length() and \ not parent_sequence.residues[start_pos].active: return if end_pos > start_pos: max_insert = parent_sequence.countActiveGaps(end_pos) n_gaps = end_pos - start_pos if max_insert >= 0: n_gaps = min(max_insert, n_gaps) n_inserted = seq.insertGaps(start_pos, n_gaps) if not gap_insert_mode and \ (not lock_down or parent_sequence.haveAnchors(end_pos + 1)): seq.removeGaps(end_pos, n_inserted) elif end_pos < start_pos: if end_pos < 0: end_pos = 0 n_removed = seq.removeGapsBackwards(end_pos, start_pos - end_pos) if not lock_down and start_pos < seq.length(): seq.insertGaps(start_pos + block_length - n_removed + 1, n_removed) else: inactive_pos = parent_sequence.inactivePosition(start_pos) if inactive_pos >= 0: seq.insertGaps(inactive_pos, n_removed) seq.propagateGapsToChildren()
[docs] def tmpLockGaps(self, row, start_pos, end_pos): """ Temporarily locks gaps in a specified region. :type row: (Sequence, int, int, int) :param row: sequence viewer row :type start_pos: int :param start_pos: initial position :type end_pos: int :param end_pos: final position """ seq, start, end, height = row for res in seq.residues: res.tmp_code = None if res.selected: if res.is_gap: res.tmp_code = res.code res.code = constants.LOCKED_GAP_SYMBOL
[docs] def tmpUnlockGaps(self, row, start_pos, end_pos): """ Unlocks temporarily locked gaps. :type row: (Sequence, int, int, int) :param row: sequence viewer row :type start_pos: int :param start_pos: initial position :type end_pos: int :param end_pos: final position """ seq, start, end, height = row for res in seq.residues: if res.selected: if res.is_gap and res.tmp_code: res.code = res.tmp_code
[docs] def grabAndDragBlock(self, rows, start_row, start_pos, end_row, end_pos, block_length, lock_down): """ This method performs grab-and-drag operation on selected blocks in "Select And Slide" mode. :type rows: list of (Sequence, int, int, int) :param rows: sequence viewer rows :type start_row: (Sequence, int, int, int) :param start_row: initial row :type start_pos: int :param start_pos: initial position :type end_row: (Sequence, int, int, int) :param end_row: final row :type end_pos: int :param end_pos: final position :type lock_down: bool :param lock_down: indicates if the sequence downstream should be locked """ start_seq, start, dummy, height = start_row end_seq, dummy, end, height = end_row found = False for row in rows: if row == start_row: found = True if found: seq, dum, dum, dum = row if not seq.parent_sequence: self.tmpLockGaps(row, start_pos, end_pos) self.grabAndDrag(row, start_pos, row, end_pos, lock_down, block_length=block_length) self.tmpUnlockGaps(row, start_pos, end_pos) if row == end_row: break
[docs] def insertGap(self, start_row, start_pos): """ Inserts a single gap at a specified position. :type start_row: (Sequence, int, int, int) :param start_row: sequence viewer row :type start_pos: int :param start_pos: position where the gap will be inserted """ start_seq, start, dummy, height = start_row if start_seq is not None: parent_sequence = seq = start_seq n_children = 0 if seq.parent_sequence: parent_sequence = seq = seq.parent_sequence n_children = len(parent_sequence.children) child_index = 0 while seq: seq.insertGaps(start_pos, 1) # Iterate over children. if child_index < n_children: seq = parent_sequence.children[child_index] child_index += 1 else: seq = None
[docs] def removeGap(self, start_row, start_pos): """ Removes a single gap at a specified position. :type start_row: (Sequence, int, int, int) :param start_row: sequence viewer row :type start_pos: int :param start_pos: position where the gap will be removed """ start_seq, start, dummy, height = start_row if start_seq is not None: parent_sequence = seq = start_seq n_children = 0 if seq.parent_sequence: parent_sequence = seq = seq.parent_sequence n_children = len(parent_sequence.children) child_index = 0 while seq: seq.removeGaps(start_pos, 1) # Iterate over children. if child_index < n_children: seq = parent_sequence.children[child_index] child_index += 1 else: seq = None
[docs] def colorBySequenceSimilarity(self): """ This method colors the sequences by sequence similarity. """ self.colorByResidueType() for seq in self.sequences: if seq.isValidProtein(global_annotation=True): for pos in range(seq.length()): res = seq.residues[pos] color = (0, 0, 0) if res.is_gap: res.inverted = False else: res.inverted = True color = self.background_color if self.reference and pos < self.reference.length(): profile_res = self.reference.residues[pos] if res.code == profile_res.code: color = (255, 64, 64) else: score = utils.matrixValue( constants.SIMILARITY_MATRIX, res.code, profile_res.code) if score > 0.0: color = (255, 128, 0) res.color = color
[docs] def colorByIdentity(self): """ This method colors the sequences by identity. """ if not self.reference: return for pos in range(self.reference.length()): ref_code = self.reference.residues[pos].code if ref_code != '.': for seq in self.sequences: if seq.isValidProtein(): if pos < seq.length(): res = seq.residues[pos] if not res.is_gap: if res.code != ref_code: res.color = self.background_color
[docs] def colorByDifference(self): """ This method colors the sequences by difference. """ if not self.reference: return for pos in range(self.reference.length()): ref_code = self.reference.residues[pos].code if ref_code != '.': for seq in self.sequences: if seq.isValidProtein(): if pos < seq.length(): res = seq.residues[pos] if not res.is_gap: if res.code == ref_code: res.color = self.background_color
[docs] def calculateProfile(self, ignore_query=False): """ Calculates global sequence profile and associated information. :type ignore_query: bool :param ignore_query: Tells if query sequence should be included in the calculation or it should be ignored. """ max_length = self.findMaxLength(exclude_consensus=True) if not self.profile: self.profile = Sequence() self.profile.short_name = "Sequence Profile" for pos in range(max_length): res = Residue() res.sequence = self.profile self.profile.residues.append(res) self.updateReference() if self.profile.length() < max_length: self.profile.residues = [] d_residues = max_length - self.profile.length() for pos in range(d_residues): res = Residue() res.sequence = self.profile self.profile.residues.append(res) else: self.profile.residues = self.profile.residues[:max_length] amino_acid_symbols = list(constants.AMINO_ACIDS) n_seq = 0 for seq in self.sequences: if seq == self.reference and ignore_query: continue if seq.isValidProtein(): n_seq += 1 if n_seq == 0: return i_n_seq = old_div(1.0, n_seq) if ignore_query or n_seq == 1: i_n_seq_1 = i_n_seq else: i_n_seq_1 = old_div(1.0, (n_seq - 1)) for pos in range(max_length): frequency = {} for aa in string.ascii_uppercase: frequency[aa] = 0.0 code = None n_res = 0 n_id = 0 n_ungapped = 0 column = "" for seq in self.sequences: if seq == self.reference and ignore_query: continue if seq.isValidProtein(): if pos < seq.length(): res = seq.residues[pos] if not res.is_gap: n_ungapped += 1 if not res.is_gap or self.consider_gaps: if res.code in amino_acid_symbols: frequency[res.code] += 1 column += res.code if code is None: code = res.code n_id += 1 elif res.code == code: n_id += 1 n_res = n_res + 1 if not self.consider_gaps and n_res > 0: factor = old_div(1.0, n_res) else: factor = i_n_seq res = self.profile.residues[pos] res.composition = set(column) res.factor = n_ungapped * factor max = 0.0 max_code = aa for aa in amino_acid_symbols: if aa == 'X': break f = float(frequency[aa]) if f > max: max = f max_code = aa # Shannon entropy entropy = 0.0 for aa in amino_acid_symbols: fract = float(frequency[aa]) * i_n_seq if fract > 0.0: entropy += -fract * math.log(fract, 2.0) res.bits = math.log(20.0, 2.0) - entropy res.entropy = old_div(entropy, -math.log(old_div(1.0, 20.0), 2.0)) n_max = 0 profile_codes = [] if max > 0.0: for aa in amino_acid_symbols: if max == float(frequency[aa]): profile_codes.append(aa) n_max += 1 # Query residue fraction res.query_fraction = 0.0 if pos < len(self.reference.residues) and \ not self.reference.residues[pos].is_gap: query_aa = self.reference.residues[pos].code if query_aa == '+': query_aa = self.reference.residues[pos].profile_codes[pos] if not ignore_query: res.query_fraction = (frequency[query_aa] - 1) * i_n_seq_1 else: res.query_fraction = frequency[query_aa] * i_n_seq_1 if n_res == n_id: res.id = True else: res.id = False max *= factor res.value = max res.profile_codes = profile_codes if code: res.is_gap = False if n_max == 1: res.code = max_code else: res.code = '+' else: res.code = '.' for aa in amino_acid_symbols: frequency[aa] *= i_n_seq res.frequencies = sorted(list(frequency.items()), key=lambda x: x[1], reverse=True) # Now calculate sequence identity for every sequence in the group. for seq in self.sequences: if seq.isValidProtein(): seq.identity = seq.calcIdentity(self.reference, self.consider_gaps, self.identity_in_columns) seq.similarity = seq.calcSimilarity(self.reference, self.consider_gaps, self.identity_in_columns) seq.score = seq.calcScore(self.reference, self.consider_gaps, self.identity_in_columns) seq.homology = seq.calcHomology(self.reference, self.consider_gaps, self.identity_in_columns)
[docs] def getConsensus(self): """ Returns a consensus sequence. """ for seq in self.sequences: if seq.type == constants.SEQ_CONSENSUS: return seq return None
[docs] def updateConsensus(self, consensus): """ Updates the consensus sequence using a pre-calculated profile. :type consensus: `Sequence` :param consensus: consensus sequence to be updated """ consensus.residues = [] consensus.global_sequence = True aa_codes = list(constants.AMINO_ACIDS) for pos in range(self.profile.length()): res = Residue() res.code = self.profile.residues[pos].code if res.code not in aa_codes: res.color = self.inv_background_color res.inverted = True res.value = self.profile.residues[pos].value res.profile_codes = self.profile.residues[pos].profile_codes res.sequence = consensus consensus.residues.append(res) if len(consensus.children) > 0: # Update the plot only if there is one. plot = consensus.children[0] plot.residues = [] plot.global_sequence = True for pos in range(self.profile.length()): res = Residue() res.code = self.profile.residues[pos].code res.value = self.profile.residues[pos].value res.sequence = plot res.color = (32, 32, 32) plot.residues.append(res) plot.calculatePlotValues(0, min_value=0.0, max_value=1.0) consensus.identity = consensus.calcIdentity(self.reference, self.consider_gaps, False) consensus.similarity = consensus.calcSimilarity(self.reference, self.consider_gaps, False) consensus.homology = consensus.calcHomology(self.reference, self.consider_gaps, False) consensus.score = consensus.calcScore(self.reference, self.consider_gaps, False)
[docs] def updateSymbols(self, symbols): """ Updates the consensus symbols string using a pre-calculated profile. :type symbols: `Sequence` :param symbols: consensus symbols string to be updated """ strong_sets = [ set("STA"), set("NEQK"), set("NHQK"), set("NDEQ"), set("QHRK"), set("MILV"), set("MILF"), set("HY"), set("FYW") ] weak_sets = [ set("CSA"), set("ATV"), set("SAG"), set("STNK"), set("STPA"), set("SGND"), set("SNDEQK"), set("NDEQHK"), set("NEQHRK"), set("FVLIM"), set("HFY") ] symbols.residues = [] symbols.global_sequence = True for pos in range(self.profile.length()): res = Residue() profile_res = self.profile.residues[pos] res.value = profile_res.value res.profile_codes = profile_res.profile_codes res.sequence = symbols res.code = ' ' res.color = self.background_color if profile_res.factor == 1.0 and profile_res.composition: if profile_res.id and len(profile_res.composition) == 1: res.code = '*' else: for strong in strong_sets: if profile_res.composition.issubset(strong): res.code = ':' break if res.code == ' ': for weak in weak_sets: if profile_res.composition.issubset(weak): res.code = '.' break symbols.residues.append(res)
[docs] def updateMeanHydrophobicity(self, seq): """ Updates mean hydrophobicity global annotation sequence. :type seq: `Sequence` :param seq: mean hydrophobicity sequence """ seq.residues = [] seq.plot_style = constants.PLOT_HISTOGRAM seq.plot_color = (192, 128, 160) seq.global_sequence = True for pos in range(self.profile.length()): res = Residue() res.code = self.profile.residues[pos].code n_aa = 0 total = 0.0 res.profile_codes = self.profile.residues[pos].profile_codes for aa in res.profile_codes: if aa != 'X': total += constants.HYDROPHOBICITY_SCALES[aa][0] n_aa += 1 if n_aa > 0: total /= n_aa res.value = total res.sequence = seq seq.residues.append(res) seq.calculatePlotValues(2)
[docs] def updateMeanPI(self, seq): """ Updates mean isoelectric point global annotation sequence. :type seq: `Sequence` :param seq: mean hydrophobicity sequence """ seq.residues = [] seq.plot_style = constants.PLOT_HISTOGRAM seq.plot_color = (128, 192, 160) seq.global_sequence = True for pos in range(self.profile.length()): res = Residue() res.code = self.profile.residues[pos].code n_aa = 0 total = 0.0 res.profile_codes = self.profile.residues[pos].profile_codes for aa in res.profile_codes: if aa != 'X': total += constants.PI_SCALE[aa] n_aa += 1 if n_aa > 0: total /= n_aa res.value = total res.sequence = seq seq.residues.append(res) seq.calculatePlotValues(2)
[docs] def colorByTaylorScheme(self): """ Colors all amino acid sequences using Taylor scheme. """ for seq in self.sequences: if seq.isValidProtein(global_annotation=True): for res in seq.residues: res.color = res.colorTaylor() if not res.is_gap: res.inverted = True else: res.inverted = False
[docs] def colorAverageColumnColors(self): """ Averages colors in columns. :note: This feature works particularly well with the Taylor scheme. """ max_length = self.findMaxLength() for pos in range(max_length): r = g = b = 0 n_res = 0 for seq in self.sequences: if seq.isValidProtein(): if pos < seq.length(): res = seq.residues[pos] if not res.is_gap: res_r, res_g, res_b = res.color r += res_r g += res_g b += res_b n_res += 1 if n_res: i_n_res = old_div(1.0, n_res) r *= i_n_res g *= i_n_res b *= i_n_res for seq in self.sequences: if seq.isValidProtein(): if pos < seq.length(): res = seq.residues[pos] if not res.is_gap: seq.residues[pos].color = \ (int(r), int(g), int(b))
[docs] def colorWeightByAlignmentStrength(self, min_weight_identity, max_weight_identity): """ Weights the sequences by alignment stregth. """ inv_range = old_div(1.0, (max_weight_identity - min_weight_identity)) profile_length = 0 if self.profile: profile_length = self.profile.length() br, bg, bb = self.background_color for seq in self.sequences: if seq.isValidProtein(): for pos in range(seq.length()): res = seq.residues[pos] if not res.is_gap and pos < profile_length: res_r, res_g, res_b = res.color value = 100.0 * self.profile.residues[pos].value value = value - min_weight_identity if value < 0: value = 0 value *= inv_range if value > 1.0: value = 1.0 res_r = br * (1.0 - value) + res_r * value res_g = bg * (1.0 - value) + res_g * value res_b = bb * (1.0 - value) + res_b * value res.color = (res_r, res_g, res_b)
[docs] def findMaxLength(self, exclude_consensus=False): """ Finds a length of the longest sequence. :type exclude_consensus: bool :param exclude_consensus: if True, the consensus sequence will not count :rtype: int :return: maximum sequence length in the entire group """ max_length = 0 for seq in self.sequences: if seq and ((not exclude_consensus) or seq.type != \ constants.SEQ_CONSENSUS) and not seq.global_sequence: if seq.length() > max_length: max_length = seq.length() return max_length
[docs] def unselectAllSequences(self): """ Unselects all sequences in the group. """ for seq in self.sequences: seq.selected = False for child in seq.children: child.selected = False
[docs] def selectAllSequences(self): """ Selects all sequences in the group. Child selection remains unchanged. """ for seq in self.sequences: if seq.type != constants.SEQ_SEPARATOR and \ seq.type != constants.SEQ_RULER: seq.selected = True
# for child in seq.children: # child.selected = False #residues should be in name!
[docs] def selectAll(self): """ Selects all residues. """ for seq in self.sequences: seq.selectAllResidues()
[docs] def deselectAll(self): """ Deselects all residues. """ for seq in self.sequences: seq.unselectResidues()
[docs] def invertSequenceSelection(self): """ Inverts sequence selection range. """ for seq in self.sequences: if len(seq.residues) > 0: if seq.selected: seq.selected = False for child in seq.children: child.selected = False else: seq.selected = True for child in seq.children: child.selected = False
[docs] def invertSelection(self): """ Inverts residue selection range. """ for seq in self.sequences: seq.invertSelection()
#ANNOTATIONS WILL NOT BE PART OF SEQUENCE:
[docs] def deleteAnnotations(self): """ Removes all annotations. """ self.sequences = [ seq for seq in self.sequences if seq.type != constants.SEQ_ANNOTATION and seq.type != constants.SEQ_CONSENSUS and seq.type != constants.SEQ_SYMBOLS and seq.type != constants.SEQ_LOGO and seq.type != constants.SEQ_SECONDARY ] not_selected = True if self.hasSelectedSequences(): not_selected = False for seq in self.sequences: if not_selected or seq.selected: seq.children = [ child for child in seq.children if child.annotation_type == constants.ANNOTATION_SSP or child.annotation_type == constants.ANNOTATION_ACC or child.annotation_type == constants.ANNOTATION_DIS or child.annotation_type == constants.ANNOTATION_DOM or child.annotation_type == constants.ANNOTATION_CCB or child.annotation_type == constants.ANNOTATION_PFAM ] self.updateReference()
[docs] def deleteGlobalAnnotations(self): """ Removes all global annotations. """ self.sequences = [ seq for seq in self.sequences if seq.type != constants.SEQ_CONSENSUS and seq.type != constants.SEQ_SYMBOLS and seq.type != constants.SEQ_LOGO and not (seq.type == constants.SEQ_ANNOTATION and (seq.annotation_type == constants.ANNOTATION_MEAN_HYDRO or seq.annotation_type == constants.ANNOTATION_MEAN_PI)) ] self.updateReference()
[docs] def deletePredictions(self): """ Removes all predictions. """ not_selected = True if self.hasSelectedSequences(): not_selected = False for seq in self.sequences: if not_selected or seq.selected: seq.children = [ child for child in seq.children if child.annotation_type != constants.ANNOTATION_SSP and child.annotation_type != constants.ANNOTATION_ACC and child.annotation_type != constants.ANNOTATION_DIS and child.annotation_type != constants.ANNOTATION_DOM and child.annotation_type != constants.ANNOTATION_CCB and child.annotation_type != constants.ANNOTATION_PFAM ] self.updateReference()
[docs] def addSeparator(self): """ Adds a separator sequence. """ seq = Sequence() seq.type = constants.SEQ_SEPARATOR self.sequences.append(seq)
[docs] def addRuler(self, update_only=False): """ Adds a ruler sequence. :rtype: `Sequence` :return: A ruler sequence. """ if len(self.sequences) == 0: return None for seq in self.sequences: if seq.type == constants.SEQ_RULER: self.sequences.remove(seq) self.sequences.insert(0, seq) seq.visible = True return seq if update_only: return seq = Sequence() seq.type = constants.SEQ_RULER self.sequences.insert(0, seq) return seq
[docs] def removeRuler(self): """ Removes a ruler sequence. """ for seq in self.sequences: if seq.type == constants.SEQ_RULER: self.sequences.remove(seq)
[docs] def addConsensusSequence(self, toggle=False): """ Creates a consensus sequence and adds it to the group. If the annotation already exists, just expands the sequence and makes it visible. :rtype: `Sequence` :return: consensus sequence """ seq = None self.calculateProfile() self.updateVariableSequences() # Check if we don't have a consensus sequence already. for seq in self.sequences: if seq.isValidProtein(): break if seq.type == constants.SEQ_CONSENSUS: if toggle: self.sequences.remove(seq) return seq.visible = True seq.showChildren() return seq if not seq: return None if not seq.isValidProtein(): return None # Create a consensus sequence consensus = Sequence() consensus.type = constants.SEQ_CONSENSUS consensus.name = "Consensus Sequence" consensus.short_name = "Consensus Sequence" consensus.global_sequence = True index = self.sequences.index(seq) self.sequences.insert(index, consensus) seq = Sequence() seq.type = constants.SEQ_ANNOTATION seq.parent_sequence = consensus seq.height = 3 seq.name = "Consensus Plot" seq.short_name = "Consensus Plot" seq.plot_color = (96, 128, 96) consensus.children.append(seq) self.updateReference() return consensus
[docs] def addConsensusSymbols(self, toggle=False): """ Creates a consensus symbols string and adds it to the group. If the annotation already exists, just expands the sequence and makes it visible. :rtype: `Sequence` :return: consensus sequence """ self.calculateProfile() self.updateVariableSequences() # Check if we don't have a consensus sequence already. seq = None for seq in self.sequences: if seq.isValidProtein(): break if seq.type == constants.SEQ_SYMBOLS: if toggle: self.sequences.remove(seq) return seq.visible = True seq.showChildren() return seq if not seq or not seq.isValidProtein(): return None # Create a consensus sequence symbols = Sequence() symbols.type = constants.SEQ_SYMBOLS symbols.name = "Consensus Symbols" symbols.short_name = "Consensus Symbols" symbols.global_sequence = True index = self.sequences.index(seq) self.sequences.insert(index, symbols) return symbols
[docs] def addMeanHydrophobicity(self, toggle=False): """ Creates a mean hydrophobicity annotation and adds it to the group. If the annotation already exists, just expands the sequence and makes it visible. :rtype: `Sequence` :return: mean hydrophbicity annotation """ self.calculateProfile() self.updateVariableSequences() if len(self.sequences) == 0: return None # Find a first amino acid sequence. for seq in self.sequences: if seq.isValidProtein(): break if seq.type == constants.SEQ_ANNOTATION and \ seq.annotation_type == constants.ANNOTATION_MEAN_HYDRO: if toggle: self.sequences.remove(seq) return seq.visible = True seq.showChildren() return seq if not seq.isValidProtein(): return None index = self.sequences.index(seq) # Create a plot sequence. plot = Sequence() plot.type = constants.SEQ_ANNOTATION plot.annotation_type = constants.ANNOTATION_MEAN_HYDRO plot.name = "Mean Hydrophobicity" plot.short_name = "Mean Hydrophobicity" plot.height = 3 plot.global_sequence = True self.sequences.insert(index, plot) return plot
[docs] def addMeanPI(self, toggle=False): """ Creates a mean isoelectric point annotation and adds it to the group. If the annotation already exists, just expands the sequence and makes it visible. :rtype: `Sequence` :return: mean hydrophbicity annotation """ self.calculateProfile() self.updateVariableSequences() if len(self.sequences) == 0: return None # Find a first amino acid sequence. for seq in self.sequences: if seq.isValidProtein(): break if seq.type == constants.SEQ_ANNOTATION and \ seq.annotation_type == constants.ANNOTATION_MEAN_PI: if toggle: self.sequences.remove(seq) return seq.visible = True seq.showChildren() return seq if not seq.isValidProtein(): return None index = self.sequences.index(seq) # Create a plot sequence. plot = Sequence() plot.type = constants.SEQ_ANNOTATION plot.annotation_type = constants.ANNOTATION_MEAN_PI plot.name = "Mean Isoelectric Point" plot.short_name = "Mean Isoelectric Point" plot.height = 3 plot.global_sequence = True self.sequences.insert(index, plot) return plot
[docs] def colorByResidueType(self): for seq in self.sequences: if seq.visible and \ seq.isValidProtein(global_annotation=True): for res in seq.residues: if not res.is_gap: res.inverted = True else: res.inverted = False res.color = res.colorType()
[docs] def colorByMaestro(self): """ Colors the sequences by Maestro colors. """ for seq in self.sequences: if seq.visible and \ seq.from_maestro: seq.color_mode = constants.COLOR_MAESTRO for res in seq.residues: if res.is_gap: res.color = (0, 0, 0) else: res.color = res.maestro_color if not res.is_gap: res.inverted = True else: res.inverted = False
[docs] def colorByKyteDoolittle(self): """ Colors the sequences by Kyte-Doolittle hydrophobicity scale. """ for seq in self.sequences: if seq.visible and seq.isValidProtein(global_annotation=True): for res in seq.residues: res.color = res.colorKD() if not res.is_gap: res.inverted = True else: res.inverted = False
[docs] def colorByHoppWoods(self): """ Colors the sequences by Hopp-Woods hydrophilicity scale. """ for seq in self.sequences: if seq.visible and seq.isValidProtein(global_annotation=True): for res in seq.residues: res.color = res.colorHW() if not res.is_gap: res.inverted = True else: res.inverted = False
[docs] def colorByGray(self): """ Colors the sequences using plain gray color. This scheme is useful in combination with "color by alignment strength." """ for seq in self.sequences: if seq.visible and seq.isValidProtein(global_annotation=True): seq.color_scheme = constants.COLOR_BLACK for res in seq.residues: if not res.is_gap: res.inverted = True res.color = (63, 63, 63) else: res.inverted = False res.color = (0, 0, 0)
[docs] def colorByWhite(self): """ Colors the sequences using white color. """ for seq in self.sequences: if seq.visible and seq.isValidProtein(global_annotation=True): seq.color_scheme = constants.COLOR_WHITE for res in seq.residues: if not res.is_gap: res.inverted = True res.color = (255, 255, 255) else: res.inverted = False res.color = (0, 0, 0)
[docs] def colorByCustomColor(self, color): """ Colors the sequences using custom color. """ for seq in self.sequences: if seq.visible and seq.isValidProtein(global_annotation=True): seq.color_scheme = constants.COLOR_CUSTOM seq.custom_color = color for res in seq.residues: if not res.is_gap: res.inverted = True res.color = color else: res.inverted = False res.color = (0, 0, 0)
[docs] def colorByCustomAnnotations(self): """ Colors the sequences using custom annotation color. """ for seq in self.sequences: if seq.visible and seq.isValidProtein(global_annotation=True): for child in seq.children: if child.annotation_type == constants.ANNOTATION_CUSTOM: for index, res in enumerate(seq.residues): if child.residues[index].color != (255, 255, 255): res.color = child.residues[index].color
[docs] def colorByBfactor(self): """ Colors the sequences using Bfactor values. """ for seq in self.sequences: if seq.visible and seq.isValidProtein(): seq.color_scheme = constants.COLOR_BFACTOR minbf = seq.residues[0].bfactor maxbf = seq.residues[-1].bfactor for res in seq.residues: if res.bfactor < minbf: minbf = res.bfactor if res.bfactor > maxbf: maxbf = res.bfactor dbf = maxbf - minbf if dbf > 0.0: dbf = old_div(1.0, dbf) else: dbf = 1.0 for res in seq.residues: if not res.is_gap: res.color = utils.colorGradientGreenRed( 1.5 * dbf * (res.bfactor - minbf) - 0.75) else: res.inverted = False res.color = self.inv_background_color
[docs] def colorByPosition(self): """ Colors the sequences using residue position. """ for seq in self.sequences: if seq.visible and seq.isValidProtein(): seq.color_scheme = constants.COLOR_POSITION residues = seq.gaplessResidues() maxpos = float(len(residues)) for index, res in enumerate(residues): res.color = utils.colorGradientGreenRed( 1.5 * (old_div(index, maxpos)) - 0.75)
[docs] def colorBySecondary(self): """ Colors the sequences using secondary structure annotation. If SSA is available, if will be used for coloring, otherwise a consensus of available SSP annotations will be used here. """ for seq in self.sequences: if seq.visible and seq.isValidProtein(global_annotation=True): seq.color_scheme = constants.COLOR_SECONDARY ssa = None for child in seq.children: if child.type == constants.SEQ_SECONDARY: ssa = child break ssp_list = [] if ssa is None: for child in seq.children: if child.annotation_type == constants.ANNOTATION_SSP: ssp_list.append(child) if ssa is None and len(ssp_list) == 0: continue for index, res in enumerate(seq.residues): if not res.is_gap: res.inverted = True if ssa: if ssa.residues[index].code == 'H': res.color = constants.SS_COLOR_HELIX elif ssa.residues[index].code == 'E': res.color = constants.SS_COLOR_EXTENDED else: res.color = (192, 192, 192) else: avg_color = [0, 0, 0] i_n_ssp = old_div(1.0, float(len(ssp_list))) for ssp in ssp_list: if ssp.residues[index].code == 'H': avg_color[0] += constants.SS_COLOR_HELIX[0] avg_color[1] += constants.SS_COLOR_HELIX[1] avg_color[2] += constants.SS_COLOR_HELIX[2] elif ssp.residues[index].code == 'E': avg_color[0] += constants.SS_COLOR_EXTENDED[ 0] avg_color[1] += constants.SS_COLOR_EXTENDED[ 1] avg_color[2] += constants.SS_COLOR_EXTENDED[ 2] else: avg_color[0] += 192 avg_color[1] += 192 avg_color[2] += 192 avg_color[0] *= i_n_ssp avg_color[1] *= i_n_ssp avg_color[2] *= i_n_ssp res.color = (int(avg_color[0]), int(avg_color[1]), int(avg_color[2])) else: res.inverted = False res.color = (0, 0, 0)
[docs] def colorByColorBlocks(self, scheme): """ Colors the sequences using plain gray color. This scheme is useful in combination with "color by alignment strength." """ for seq in self.sequences: if seq.visible and seq.isValidProtein(global_annotation=True): seq.color_scheme = scheme for res in seq.residues: # if not res.is_gap and not res.code in ['.', '+']: res.inverted = True # else: # res.inverted = False if scheme == constants.COLOR_HELIX_PROPENSITY: res.color = res.colorHelixPropensity() elif scheme == constants.COLOR_STRAND_PROPENSITY: res.color = res.colorStrandPropensity() elif scheme == constants.COLOR_TURN_PROPENSITY: res.color = res.colorTurnPropensity() elif scheme == constants.COLOR_SIDECHAIN_CHEMISTRY: res.color = res.colorSidechainChemistry() elif scheme == constants.COLOR_STERIC_GROUP: res.color = res.colorStericGroup() elif scheme == constants.COLOR_EXPOSURE_TENDENCY: res.color = res.colorExposureTendency()
[docs] def colorSequences(self, mode=None, color=None): """ Colors the sequences using a specified mode. :type mode: int :param mode: coloring mode """ if mode is None: mode = self.color_mode if mode == constants.COLOR_BLACK: self.colorByGray() elif mode == constants.COLOR_WHITE: self.colorByWhite() elif mode == constants.COLOR_RESIDUE_TYPE: self.colorByResidueType() elif mode == constants.COLOR_SIMILARITY: self.colorBySequenceSimilarity() elif mode == constants.COLOR_KYTE_DOOLITTLE: self.colorByKyteDoolittle() elif mode == constants.COLOR_HOPP_WOODS: self.colorByHoppWoods() elif mode == constants.COLOR_TAYLOR: self.colorByTaylorScheme() elif mode == constants.COLOR_MAESTRO: self.colorByMaestro() elif mode == constants.COLOR_SECONDARY: self.colorBySecondary() elif mode == constants.COLOR_BFACTOR: self.colorByBfactor() elif mode == constants.COLOR_POSITION: self.colorByPosition() elif mode == constants.COLOR_HELIX_PROPENSITY: self.colorByColorBlocks(constants.COLOR_HELIX_PROPENSITY) elif mode == constants.COLOR_STRAND_PROPENSITY: self.colorByColorBlocks(constants.COLOR_STRAND_PROPENSITY) elif mode == constants.COLOR_TURN_PROPENSITY: self.colorByColorBlocks(constants.COLOR_TURN_PROPENSITY) elif mode == constants.COLOR_SIDECHAIN_CHEMISTRY: self.colorByColorBlocks(constants.COLOR_SIDECHAIN_CHEMISTRY) elif mode == constants.COLOR_EXPOSURE_TENDENCY: self.colorByColorBlocks(constants.COLOR_EXPOSURE_TENDENCY) elif mode == constants.COLOR_STERIC_GROUP: self.colorByColorBlocks(constants.COLOR_STERIC_GROUP) elif mode == constants.COLOR_CUSTOM: if color is not None: self.custom_color = color self.colorByCustomColor(self.custom_color) self.colorByCustomAnnotations()
[docs] def hideAllChildren(self): """ Hides all children, effectively collapsing the sequences. """ for seq in self.sequences: seq.hideChildren()
[docs] def showAllChildren(self): """ Shows all children, effectively expanding the sequences. """ for seq in self.sequences: seq.showChildren()
[docs] def updateVariableSequences(self): """ Updates global variable sequences, i.e. consensus plot and "mean" annotations. """ self.updateReference() for seq in self.sequences: if seq.type == constants.SEQ_CONSENSUS: self.updateConsensus(seq) elif seq.type == constants.SEQ_SYMBOLS: self.updateSymbols(seq) elif seq.type == constants.SEQ_ANNOTATION: if seq.annotation_type == constants.ANNOTATION_MEAN_HYDRO: self.updateMeanHydrophobicity(seq) elif seq.annotation_type == constants.ANNOTATION_MEAN_PI: self.updateMeanPI(seq)
[docs] def padAlignment(self): """ Pads the alignment with additional gaps, so all sequences have identical length equal to the length of the longest sequence. """ max_length = self.findMaxLength(exclude_consensus=True) for seq in self.sequences: if seq.isValidProtein(): if seq.length() and seq.length() < max_length: active = seq.residues[seq.length() - 1].active seq.insertGaps(seq.length(), max_length - seq.length(), active=active) for child in seq.children: if child.length() < max_length: child.insertGaps(child.length(), max_length - child.length())
[docs] def removeTerminalGaps(self): """ Removes terminal gaps from the alignment. """ for seq in self.sequences: if seq.isValidProtein(): while len(seq.residues) and seq.residues[-1].is_gap: seq.residues.pop() if not len(seq.residues): self.sequences.remove(seq) else: for child in seq.children: while len(child.residues) and child.residues[-1].is_gap: child.residues.pop()
[docs] def encode(self, seq): """ Encodes the sequence to include annotation data required by pattern search. Return an ungapped version of the pattern. """ ssa = None for child in seq.children: if child.type == constants.SEQ_SECONDARY: ssa = child break acc = None for child in seq.children: if child.annotation_type == constants.ANNOTATION_ACC: acc = child flex = False avg_flexibility = 0.0 n_residues = 0 for aa in seq.residues: if aa.is_gap: continue if aa.bfactor: avg_flexibility += aa.bfactor n_residues += 1 if aa.bfactor > 0.0: flex = True if n_residues > 0: avg_flexibility /= n_residues seq_string = "" ssa_string = "" acc_string = "" flx_string = "" res_ids = [] for index, aa in enumerate(seq.residues): if aa.is_gap: continue seq_string += aa.code # Store ID (includes residue number and insertion code) res_ids.append(aa.id().strip()) if ssa: ssa_string += ssa.residues[index].code.lower() if acc: acc_string += acc.residues[index].code if flex: if aa.bfactor < avg_flexibility: flx_string += 'r' else: flx_string += 'f' result = {} result['amino_acids'] = seq_string result['res_ids'] = res_ids if ssa: result['secondary_structure'] = ssa_string if acc: result['solvent_accessibility'] = acc_string if flex: result['flexibility'] = flx_string return result
[docs] def findPattern(self, pattern): """ Finds a specified PROSITE pattern in all sequences. """ self.unselectAll() if pattern: pattern = pattern.strip().rstrip() if not pattern: for seq in self.sequences: seq.makeActive() if not find_generalized_pattern: return if not pattern: return seq_string_list = [] sequence_list = [] if not delete_from_str(pattern, string.ascii_uppercase): new_pattern = pattern[0] for c in pattern[1:]: new_pattern += '-' + c pattern = new_pattern count = 0 for seq in self.sequences: if seq.isValidProtein(): seq.makeInactive() seq_string_list.append(self.encode(seq)) sequence_list.append(seq) results_list = find_generalized_pattern(seq_string_list, pattern) if results_list: for index, match_list in enumerate(results_list): seq = sequence_list[index] residues = seq.gaplessResidues() for start, end in match_list: for ridx in range(start, end): residues[ridx].selected = True count += 1 return results_list
########################################################################### # Sequence group sorting methods. ###########################################################################
[docs] def sortKeyName(self, sequence): """ Returns sequence name sort key. :rtype: string :return: sequence name sort key """ return sequence.name
[docs] def sortKeyChain(self, sequence): """ Returns sequence chain ID sort key. :rtype: string :return: sequence chain ID sort key """ return sequence.chain_id
[docs] def sortKeyLength(self, sequence): """ Returns sequence length sort key. :rtype: int :return: sequence length sort key """ return sequence.gaplessLength()
[docs] def sortKeyGaps(self, sequence): """ Returns sequence number of gaps sort key. :rtype: int :return: sequence number of gaps sort key """ return sequence.numberOfGaps()
[docs] def sortKeyIdentity(self, sequence): """ Returns sequence identity with consesus sequence sort key. :rtype: float :return: sequence identity with consesus sequence sort key """ return sequence.identity
[docs] def sortKeyHomology(self, sequence): """ Returns sequence homology with reference sequence sort key. :rtype: float :return: sequence homology with reference sequence sort key """ return sequence.homology
[docs] def sortKeySimilarity(self, sequence): """ Returns sequence similarity to the consesus sequence as a sort key. :rtype: float :return: sequence similarity to the consesus sequence sort key """ return sequence.similarity
[docs] def sortKeyScore(self, sequence): """ Returns sequence score sort key. :rtype: float :return: sequence score sort key """ return sequence.score
[docs] def getSortableSequences(self, ignore_reference=True): """ Returns a list of "sortable" sequences, i.e. all parent sequences that are not auxiliary objects and not global annotations. :type: ignore_reference: boolean :param: Normally, the reference is not considered a sortable sequence, unless ignore_reference is set to False. In such case the reference will be included in the sortable group. :rtype: list of `Sequence` :return: list of sortable sequences """ # Extract non-global sequences. my_reference = None if ignore_reference: my_reference = self.reference return [ seq for seq in self.sequences if seq.isSortable(reference=my_reference) ]
[docs] def replaceSortableSequences(self, sequence_list, ignore_reference=True): """ This method replaces all "sortable" sequences with a list of sorted sequences. The length of the given list is supposed to have a number of items equal to the number of sortable sequences. This method is used to replace sequences in the group after sorting operation. :type sequence_list: list of `Sequence` :param sequence_list: replacement list of sequences :type: ignore_reference: boolean :param: Normally, the reference is not considered a sortable sequence, unless ignore_reference is set to False. In such case the reference will be included in the sortable group. """ # Extract non-global sequences. my_reference = None if ignore_reference: my_reference = self.reference list_index = 0 list_size = len(sequence_list) for pos in range(len(self.sequences)): seq = self.sequences[pos] if seq.isSortable(reference=my_reference): if list_index < list_size: self.sequences[pos] = sequence_list[list_index] list_index += 1
[docs] def sort(self, order, reverse_order=False): """ This method sorts the sequences according to a specified order. Optionally, the sequences can be sorted in a reverse order. :type order: int :param order: sort order :type reverse_order: bool :param reverse_order: if True, the sequences will be reverse sorted (default=False, i.e. sorting from smallest to largest key) """ # Calculate profile, just to be up-to-date. self.calculateProfile() sortable = self.getSortableSequences() # Sort the sequences according to specified sort order. if order == constants.SORT_NAME: sortable.sort(key=self.sortKeyName) elif order == constants.SORT_CHAIN: sortable.sort(key=self.sortKeyChain) elif order == constants.SORT_LENGTH: sortable.sort(key=self.sortKeyLength) elif order == constants.SORT_GAPS: sortable.sort(key=self.sortKeyGaps) elif order == constants.SORT_IDENTITY: sortable.sort(key=self.sortKeyIdentity) elif order == constants.SORT_HOMOLOGY: sortable.sort(key=self.sortKeyHomology) elif order == constants.SORT_SIMILARITY: sortable.sort(key=self.sortKeySimilarity) elif order == constants.SORT_SCORE: sortable.sort(key=self.sortKeyScore) if reverse_order: sortable.reverse() # Replace non-global sequences with the sorted ones. sorted_pos = 0 for pos in range(len(self.sequences)): seq = self.sequences[pos] if seq.isSortable(reference=self.reference): self.sequences[pos] = sortable[sorted_pos] sorted_pos += 1 self.updateReference()
[docs] def moveUp(self): """ Moves selected sequences to the top of the group. """ sortable = self.getSortableSequences() if sortable and self.hasSelectedSequences(): for seq in sortable: if seq.selected: pos = sortable.index(seq) if pos > 0: sortable.remove(seq) sortable.insert(pos - 1, seq) self.replaceSortableSequences(sortable) else: for seq in self.sequences: n_children = len(seq.children) for index in range(n_children): child = seq.children[index] if child.selected and index: seq.children.remove(child) seq.children.insert(index - 1, child)
[docs] def moveDown(self): """ Moves selected sequences to the bottom of the group. """ sortable = self.getSortableSequences() if sortable and self.hasSelectedSequences(): for seq in reversed(sortable): if seq.selected: pos = sortable.index(seq) sortable.remove(seq) sortable.insert(pos + 1, seq) self.replaceSortableSequences(sortable) else: for seq in self.sequences: n_children = len(seq.children) for index in range(n_children - 1, -1, -1): child = seq.children[index] if child.selected and index < n_children - 1: seq.children.remove(child) seq.children.insert(index + 1, child)
[docs] def moveTop(self, target=None): """ Moves selected sequences to the top of the group. If target is specified, move only the target sequence to top. """ if target == self.reference: # When target is reference, include the reference in the # sortable group. sortable = self.getSortableSequences(ignore_reference=False) else: sortable = self.getSortableSequences() if target and target in sortable: sortable.remove(target) sortable.insert(0, target) sorted_list = sortable else: sorted_list = [] for seq in sortable: if seq.selected: sorted_list.append(seq) for seq in sortable: if not seq.selected: sorted_list.append(seq) if target == self.reference: self.replaceSortableSequences(sorted_list, ignore_reference=False) else: self.replaceSortableSequences(sorted_list)
[docs] def moveBottom(self): """ Moves selected sequences to the bottom of the group. """ sortable = self.getSortableSequences() sorted_list = [] for seq in sortable: if not seq.selected: sorted_list.append(seq) for seq in sortable: if seq.selected: sorted_list.append(seq) self.replaceSortableSequences(sorted_list)
[docs] def sortByTreeOrder(self): """ Sorts the sequences by the tree order. If there is no tree, returns False. :return: True on success, False if valid tree doesn't exist. :rtype: boolean """ if not self.tree: return False tree_list = [] tree = self.tree.getVisibleTree() tree_list = tree.getSequenceList(tree_list) if len(tree_list) == 0: return False # Replace all non-global sequences with sorted list. sorted_pos = 0 for pos in range(min(len(self.sequences), len(tree_list))): seq = self.sequences[pos] if seq.isValidProtein(): self.sequences[pos] = tree_list[sorted_pos] sorted_pos += 1 return True
[docs] def duplicateSelectedSequences(self): """ Duplicates selected sequences. """ copy_list = [] for index, seq in enumerate(self.sequences): if seq.selected: copy_list.append((index + len(copy_list), seq.copyForUndo())) for index, seq in copy_list: self.sequences.insert(index, seq)
[docs] def getGlobalAnnotation(self, annotation_type): return next((seq for seq in self.sequences if seq.annotation_type == annotation_type), None)
[docs] def addAnnotation(self, annotation_type, remove=False): """ Adds an annotation sequence to selected sequences or to all sequences if no sequence is selected. :type annotation_type: int :param annotation_type: type of the annotation sequence """ has_selected = self.hasSelectedSequences() seq = None if remove: for seq in self.sequences: if (not has_selected or seq.selected) and \ seq.isValidProtein() and \ seq.hasAnnotationType(annotation_type): for child in seq.children: if child.annotation_type == annotation_type: seq.children.remove(child) return if annotation_type == constants.ANNOTATION_HELIX_PROPENSITY: # Add helix propensity annotation. for seq in self.sequences: if (not has_selected or seq.selected) and \ seq.isValidProtein() and \ not seq.hasAnnotationType(annotation_type): plot = seq.createAnnotationSequence() plot.name = "Helix Propensity" plot.short_name = "Helix Propensity" plot.annotation_type = annotation_type plot.plot_style = constants.PLOT_COLOR_BLOCKS for pos in range(len(seq.residues)): parent_res = seq.residues[pos] res = plot.residues[pos] res.color = parent_res.colorHelixPropensity() elif annotation_type == constants.ANNOTATION_STRAND_PROPENSITY: # Add strand propensity annotation. for seq in self.sequences: if (not has_selected or seq.selected) and \ seq.isValidProtein() and \ not seq.hasAnnotationType(annotation_type): plot = seq.createAnnotationSequence() plot.name = "Strand Propensity" plot.short_name = "Strand Propensity" plot.annotation_type = annotation_type plot.plot_style = constants.PLOT_COLOR_BLOCKS for pos in range(len(seq.residues)): parent_res = seq.residues[pos] res = plot.residues[pos] res.color = parent_res.colorStrandPropensity() elif annotation_type == constants.ANNOTATION_TURN_PROPENSITY: # Add turn propensity annotation. for seq in self.sequences: if (not has_selected or seq.selected) and \ seq.isValidProtein() and \ not seq.hasAnnotationType(annotation_type): plot = seq.createAnnotationSequence() plot.name = "Turn Propensity" plot.short_name = "Turn Propensity" plot.annotation_type = annotation_type plot.plot_style = constants.PLOT_COLOR_BLOCKS for pos in range(len(seq.residues)): parent_res = seq.residues[pos] res = plot.residues[pos] res.color = parent_res.colorTurnPropensity() elif annotation_type == constants.ANNOTATION_STERIC_GROUP: # Add strand propensity annotation. for seq in self.sequences: if (not has_selected or seq.selected) and \ seq.isValidProtein() and \ not seq.hasAnnotationType(annotation_type): plot = seq.createAnnotationSequence() plot.name = "Steric Group" plot.short_name = "Steric Group" plot.annotation_type = annotation_type plot.plot_style = constants.PLOT_COLOR_BLOCKS for pos in range(len(seq.residues)): parent_res = seq.residues[pos] res = plot.residues[pos] res.color = parent_res.colorStericGroup() elif annotation_type == constants.ANNOTATION_HELIX_TERMINATORS: for seq in self.sequences: if (not has_selected or seq.selected) and \ seq.isValidProtein() and \ not seq.hasAnnotationType(annotation_type): plot = seq.createAnnotationSequence() plot.name = "Helix Breaking" plot.short_name = "Helix Breaking" plot.annotation_type = annotation_type plot.plot_style = constants.PLOT_COLOR_BLOCKS for pos in range(len(seq.residues)): parent_res = seq.residues[pos] res = plot.residues[pos] res.color = parent_res.colorHelixTerminators() elif annotation_type == constants.ANNOTATION_SIDECHAIN_CHEMISTRY: for seq in self.sequences: if (not has_selected or seq.selected) and \ seq.isValidProtein() and \ not seq.hasAnnotationType(annotation_type): plot = seq.createAnnotationSequence() plot.name = "Sidechain Chemistry" plot.short_name = "Sidechain Chemistry" plot.annotation_type = annotation_type plot.plot_style = constants.PLOT_COLOR_BLOCKS for pos in range(len(seq.residues)): parent_res = seq.residues[pos] res = plot.residues[pos] res.color = parent_res.colorSidechainChemistry() elif annotation_type == constants.ANNOTATION_EXPOSURE_TENDENCY: for seq in self.sequences: if (not has_selected or seq.selected) and \ seq.isValidProtein() and \ not seq.hasAnnotationType(annotation_type): plot = seq.createAnnotationSequence() plot.name = "Exposure Tendency" plot.short_name = "Exposure Tendency" plot.annotation_type = annotation_type plot.plot_style = constants.PLOT_COLOR_BLOCKS for pos in range(len(seq.residues)): parent_res = seq.residues[pos] res = plot.residues[pos] res.color = parent_res.colorExposureTendency() elif annotation_type == constants.ANNOTATION_BFACTOR: # Add strand propensity annotation. for seq in self.sequences: if (not has_selected or seq.selected) and seq.visible and \ seq.has_structure and seq.isValidProtein() and \ not seq.hasAnnotationType(annotation_type): plot = seq.createAnnotationSequence() plot.name = "B factor" plot.short_name = "B factor" plot.annotation_type = annotation_type plot.plot_style = constants.PLOT_HISTOGRAM plot.height = 3 plot.plot_color = (128, 160, 240) for pos in range(len(seq.residues)): parent_res = seq.residues[pos] res = plot.residues[pos] res.value = parent_res.bfactor plot.calculatePlotValues(0) elif annotation_type == constants.ANNOTATION_HYDROPHOBICITY: # Add strand propensity annotation. for seq in self.sequences: if (not has_selected or seq.selected) and \ seq.isValidProtein() and \ not seq.hasAnnotationType(annotation_type): plot = seq.createAnnotationSequence() plot.name = "Hydrophobicity" plot.short_name = "Hydrophobicity" plot.annotation_type = annotation_type plot.plot_style = constants.PLOT_HISTOGRAM plot.height = 3 plot.plot_color = (240, 160, 240) amino_acids = list(constants.HYDROPHOBICITY_SCALES) for pos in range(len(seq.residues)): parent_res = seq.residues[pos] res = plot.residues[pos] if parent_res.code in amino_acids: res.value = constants.HYDROPHOBICITY_SCALES[ parent_res.code][constants.KD_SCALE] plot.calculatePlotValues(2) elif annotation_type == constants.ANNOTATION_PI: # Add isoelectric point annotation. for seq in self.sequences: if (not has_selected or seq.selected) and \ seq.isValidProtein() and \ not seq.hasAnnotationType(annotation_type): plot = seq.createAnnotationSequence() plot.name = "Isoelectric Point" plot.short_name = "Isoelectric Point" plot.annotation_type = annotation_type plot.plot_style = constants.PLOT_HISTOGRAM plot.height = 3 plot.plot_color = (160, 240, 192) amino_acids = list(constants.PI_SCALE) for pos in range(len(seq.residues)): parent_res = seq.residues[pos] res = plot.residues[pos] if parent_res.code in amino_acids: res.value = constants.PI_SCALE[parent_res.code] plot.calculatePlotValues(2) elif annotation_type == constants.ANNOTATION_IDENTITY: # Add strand propensity annotation. for seq in self.sequences: if (not has_selected or seq.selected) and \ seq.isValidProtein() and \ not seq.hasAnnotationType(annotation_type): plot = seq.createAnnotationSequence() plot.name = "Identity" plot.short_name = "Identity" plot.annotation_type = annotation_type plot.plot_style = constants.PLOT_LINE plot.height = 3 amino_acids = list(constants.HYDROPHOBICITY_SCALES) for pos in range(len(seq.residues)): parent_res = seq.residues[pos] res = plot.residues[pos] res.is_gap = parent_res.is_gap if parent_res.code in amino_acids: res.value = \ constants.HYDROPHOBICITY_SCALES[ parent_res.code][ constants.KD_SCALE] plot.calculatePlotValues(2) elif annotation_type == constants.ANNOTATION_RESNUM: # Add residue number annotation for seq in self.sequences: if (not has_selected or seq.selected) and \ seq.isValidProtein() and \ not seq.hasAnnotationType(annotation_type): plot = seq.createAnnotationSequence() plot.name = "Residue Numbers" plot.short_name = "" plot.annotation_type = annotation_type elif annotation_type == constants.ANNOTATION_SSBOND: # Add SSBond annotation for seq in self.sequences: if (not has_selected or seq.selected) and \ seq.isValidProtein() and \ not seq.hasAnnotationType(annotation_type): if not seq.ssb_bond_list: continue plot = seq.createSSBondAssignment() seq.children.insert(0, plot) plot.annotation_type = annotation_type plot.height = int(len(seq.ssb_bond_list) * 0.5 + 0.5) plot.parent_sequence = seq if seq: seq.propagateGapsToChildren()
[docs] def addCustomAnnotation(self, sequence=None, title="Custom Annotation", name="", region_list=[]): # noqa: M511 """ This function adds or updates a custom annotation in the specified sequence. :param region_list: List of residue regions. Each item should include: (first_res_id, last_res_id, label, color) """ for plot in sequence.children: if plot.annotation_type == constants.ANNOTATION_CUSTOM: sequence.children.remove(plot) break for seq in self.sequences: if seq == sequence and seq.isValidProtein() and \ not seq.hasAnnotationType(constants.ANNOTATION_CUSTOM): plot = seq.createAnnotationSequence() plot.name = title plot.short_name = name plot.annotation_type = constants.ANNOTATION_CUSTOM plot_residues = plot.gaplessResidues() for first, last, label, color in region_list: lab_idx = 0 for index in range(first, last + 1): if index < len(plot_residues): plot_residues[index].color = color plot_residues[index].code = ' ' if lab_idx < len(label): plot_residues[index].code = label[lab_idx] lab_idx += 1 # else: # plot_residues[index].color = res.color # Assign gaps for pos in range(len(seq.residues)): parent_res = seq.residues[pos] res = plot.residues[pos] res.inverted = True res.is_gap = parent_res.is_gap
[docs] def selectRegions(self, sequence, region_list): """ Select residue subsets based on a region list. """ for first, last, label, color in region_list: for index in range(first, last + 1): sequence.residues[index].selected = True
[docs] def selectRedundantSequences(self, value, columns=False, reference=None): """ Selects sequences below a specified identity threshold value. """ aa_sequences = [seq for seq in self.sequences if seq.isValidProtein()] n_aa_sequences = len(aa_sequences) if n_aa_sequences == 0: return for seq in aa_sequences: seq.selected = False if reference: for n in range(n_aa_sequences): id = aa_sequences[n].calcIdentity(reference, self.consider_gaps, columns) if id * 100 >= value: aa_sequences[n].selected = True else: for n in range(n_aa_sequences): for m in range(n): id = aa_sequences[n].calcIdentity(aa_sequences[m], self.consider_gaps, columns) if id * 100 >= value: len_n = aa_sequences[n].gaplessLength() len_m = aa_sequences[m].gaplessLength() if len_n < len_m: aa_sequences[n].selected = True else: aa_sequences[m].selected = True
[docs] def clearConstraints(self): for seq in self.sequences: if seq.type == constants.SEQ_CONSTRAINTS: seq.constraint_list = [] break
[docs] def removeConstraints(self): for seq in self.sequences: if seq.type == constants.SEQ_CONSTRAINTS: self.sequences.remove(seq) break
[docs] def hasConstraints(self): for seq in self.sequences: if seq.type == constants.SEQ_CONSTRAINTS: if len(seq.constraint_list) > 0: return True return False
[docs] def addConstraint(self, seq1, pos1, seq2, pos2, for_prime=False): """ Adds a pairwise alignment constraint. At least one of the specified sequences has to be a reference sequence. :rtype: boolean :return: True if the constrait was successfully added, False otherwise """ if self.reference not in self.sequences: return False index = self.sequences.index(self.reference) next_seq = None for seq in self.sequences[index + 1:]: if seq.isValidProtein(): next_seq = seq break if not next_seq: return False cons_seq = None for seq in self.sequences: if seq.type == constants.SEQ_CONSTRAINTS: cons_seq = seq break if not cons_seq: cons_seq = Sequence() cons_seq.constraint_list = [] cons_seq.type = constants.SEQ_CONSTRAINTS cons_seq.height = 2 cons_seq.name = cons_seq.short_name = "Constraints" self.sequences.insert(index + 1, cons_seq) if seq1 is None and seq2 is None: return True if seq1 != self.reference and seq2 != self.reference: return False if seq2 == self.reference: # Reorder if seq1 is not a reference. tmp = seq1 seq1 = seq2 seq2 = tmp tmp = pos1 pos1 = pos2 pos2 = tmp if for_prime: seq1 = self.reference seq2 = self.reference next_seq = self.reference if pos2 > pos1: tmp = pos2 pos2 = pos1 pos1 = tmp ref_pos = pos1 pos = pos2 if ref_pos is None or pos is None: return False real_ref_pos = self.reference.getUngappedIndex(ref_pos) real_pos = next_seq.getUngappedIndex(pos) for constraint in cons_seq.constraint_list: start, end, seq, prime = constraint if start == real_ref_pos and end == real_pos: cons_seq.constraint_list.remove(constraint) return cons_seq.constraint_list.append( (real_ref_pos, real_pos, next_seq, for_prime)) return True
[docs] def statistics(self): total, selected, hidden, maestro = (0, 0, 0, 0) for seq in self.sequences: if not seq.type == constants.SEQ_RULER and \ not seq.type == constants.SEQ_SEPARATOR: if not seq.visible: hidden += 1 if seq.selected: selected += 1 if seq.from_maestro: maestro += 1 total += 1 return total, selected, maestro, hidden
[docs] def setReference(self, ref=None): """ Sets a reference sequence. """ # To avoid any confusion, just remove the constraints. self.removeConstraints() if ref and ref in self.sequences and (ref.isValidProtein() or ref.type == constants.SEQ_CONSENSUS): self.reference = ref self.moveTop(target=ref) return True elif self.hasSelectedSequences(): for seq in self.sequences: if seq.selected: if seq.type == constants.SEQ_CONSENSUS or \ seq.isValidProtein(): self.reference = seq self.moveTop(target=seq) return True return False
[docs] def removeMaestroSequences(self): """ Removes all Maestro sequences. """ self.sequences = [seq for seq in self.sequences if not seq.from_maestro] self.removeLonelyRuler()
[docs] def removeLonelyRuler(self): """ Remove ruler but only if there is nothing else left. :rtype: bool :return: True if the ruler was removed """ seq = None for seq in self.sequences: if seq.type != constants.SEQ_RULER: return False if seq: self.sequences = [] return True
[docs] def toggleHistory(self): """ Adds or removes a sequence modification history string. """ for seq in self.sequences: if seq.type == constants.SEQ_HISTORY: self.sequences.remove(seq) return history = Sequence() history.type = constants.SEQ_HISTORY history.name = "History of Changes" history.short_name = history.name[:] self.sequences.append(history)
[docs] def resetHistory(self): """ This function resets change tracking string. """ history = None for seq in self.sequences: if seq.type == constants.SEQ_HISTORY: history = seq break if history: for res in history.residues: res.value = 0.0 res.color = (255, 255, 255)
# if self.viewer: # self.viewer.updateView()
[docs] def updateHistory(self, start, end): """ This function updates an internal history string. :rtype: bool :return: True if the history was updated, False otherwise. """ history = None for seq in self.sequences: if seq.type == constants.SEQ_HISTORY: history = seq break if history: if end < start: tmp = start start = end end = tmp if end >= history.length(): new_length = end - history.length() + 1 for pos in range(new_length): res = Residue() res.sequence = history res.color = (255, 255, 255) res.value = 0.0 res.code = ' ' res.inverted = True history.residues.append(res) for res in history.residues: res.value -= 0.1 if res.value < 0.0: res.value = 0.0 elif res.value < 0.2: res.value = 0.2 color = int(255 * (1.0 - res.value)) res.color = (color, color, color) for pos in range(start, end): res = history.residues[pos] res.color = (0, 0, 0) res.value = 1.0 # Update the viewer. # if self.viewer: # self.viewer.updateView() return True return False
[docs] def setConsiderGaps(self, value): """ Sets value of consider gaps flag. If set to True, gaps will be included in calculation of local sequence similarity measures. :type value: bool :param value: Should we consider gaps for sequence identity calculations. """ self.consider_gaps = value self.calculateProfile() self.updateVariableSequences()
# if self.viewer: # self.viewer.updateView()
[docs] def updateSSA(self, remove=False): """ Updates secondary structure assignments for all structure-coupled sequences. """ any_selected = self.hasSelectedSequences() for seq in self.sequences: if not any_selected or seq.selected: if seq.has_structure: for child in seq.children: if child.type == constants.SEQ_SECONDARY: seq.children.remove(child) break if remove: continue ssa = seq.createSecondaryAssignment() for pos in range(len(ssa.residues)): ss_res = ssa.residues[pos] ss_res.code = seq.residues[pos].ss_code if not seq.residues[pos].is_gap: ss_res.inverted = True if ss_res.code == 'H': ss_res.color = constants.SS_COLOR_HELIX elif ss_res.code == 'E': ss_res.color = constants.SS_COLOR_EXTENDED else: ss_res.color = constants.SS_COLOR_COIL seq.children.insert(0, ssa) ssa.parent_sequence = seq seq.propagateGapsToChildren()
# if self.viewer: # self.viewer.updateView()
[docs] def expandSelection(self): """ Expands selection to include entire columns. """ no_selected = not self.hasSelectedSequences() for pos in range(self.findMaxLength()): for seq in self.sequences: if pos < seq.length(): if seq.selected or no_selected: if seq.residues[pos].selected: self.selectColumns(pos, pos) break
[docs] def expandSelectionRef(self): """ Expands selection from the reference sequence to include entire columns. """ if not self.reference: return for pos in range(self.reference.length()): if self.reference.residues[pos].selected: self.selectColumns(pos, pos)
[docs] def hideColumns(self, unselected=False): """ Hides selected or un-selected columns. """ max = self.findMaxLength() # Just to be sure we are not going to loose any residues here. self.showAllResidues() for pos in range(max): if self.isColumnSelected(pos): if not unselected: self.hideColumn(pos) elif unselected: self.hideColumn(pos) for seq in self.sequences: if seq.isValidProtein(): seq.tmp_residues = copy.copy(seq.residues) seq.residues = [res for res in seq.residues if res.visible] seq.tmp_children = [] for child in seq.children: seq.tmp_children.append(copy.copy(child.residues)) child.residues = [ res for res in child.residues if res.visible ]
[docs] def isColumnSelected(self, pos, weak=False): """ Returns True if the specified column is selected, False otherwise. :type weak: bool :param weak: If weak is False (default) treat the column as selected only if all residues in the column are selected. """ for seq in self.sequences: if seq.isValidProtein(): if pos < seq.length(): if not weak: if not seq.residues[pos].selected: return False else: if seq.residues[pos].selected: return True return not weak
[docs] def hideColumn(self, pos): """ Hides the specified column. """ for seq in self.sequences: if seq.isValidProtein(): if pos < seq.length(): seq.residues[pos].visible = False for child in seq.children: child.residues[pos].visible = False
[docs] def showAllResidues(self): """ Makes all residues in all sequences visible. """ for seq in self.sequences: if seq.isValidProtein(): if seq.tmp_residues: seq.residues = seq.tmp_residues for res in seq.residues: res.visible = True seq.tmp_residues = None if seq.tmp_children: for index, child in enumerate(seq.children): child.residues = seq.tmp_children[index] seq.tmp_children = None for child in seq.children: for res in child.residues: res.visible = True
[docs] def markResidues(self, rgb): for seq in self.sequences: if seq.isValidProtein(): for res in seq.residues: if res.selected: res.marked_color = rgb
[docs] def clearMarkedResidues(self): no_selected = not self.hasSelectedResidues() for seq in self.sequences: if seq.isValidProtein(): for res in seq.residues: if no_selected or res.selected: res.marked_color = None
[docs] def cropSelectedResidues(self): if self.hasSelectedResidues(): self.invertSelection() self.deleteSelectedResidues()
[docs] def markTemplateRegion(self): for seq in self.sequences: if seq.has_structure and \ seq != self.reference: for index, res in enumerate(seq.residues): if res.selected: res.model = True res.selected = False for seq2 in self.sequences: if seq != seq2: if index < seq2.length(): seq2.residues[index].model = False # msv_maestro.propagateColorsToMaestro( # self.viewer, seq, template_colors=True) self.unselectAll()
[docs] def premarkTemplateRegion(self): if not self.hasValidTemplates(): return False for seq in self.sequences: if seq.isValidTemplate(reference=self.reference): for res in seq.residues: if res.model: return False for seq in self.sequences: if seq.isValidTemplate(reference=self.reference): for index, res in enumerate(seq.residues): res.model = True break self.unselectAll() # if self.viewer: # self.viewer.repaint() return True
[docs] def unmarkTemplateRegions(self): for seq in self.sequences: if seq.has_structure and seq != self.reference: for res in seq.residues: res.model = False # msv_maestro.propagateColorsToMaestro(self.viewer, # seq, template_colors=True) self.unselectAll()
[docs] def getStructureList(self, omit_reference=False): """ Returns a list of visible sequences associated with structures. """ return [ seq for seq in self.sequences if seq.has_structure and seq.visible and (not omit_reference or (seq != self.reference)) ]
[docs] def getMaestroSequencesForEntryId(self, entry_id): seq_list = [] for seq in self.sequences: if seq.from_maestro and seq.maestro_entry_id == entry_id: seq_list.append(seq) return seq_list
[docs] def colorSequenceNames(self, color): no_selected = not self.hasSelectedSequences() for seq in self.sequences: if no_selected or seq.selected: seq.color = color children_selected = seq.hasSelectedChildren() for child in seq.children: if not children_selected or child.selected: child.color = color
[docs] def selectFirstTemplate(self, n_templates=1): """ Selects first available valid template from a template list. If a template is alread selected, do nothing. Optionally, select n_templates valid templates instead of just the first one. """ for seq in self.sequences: if seq.selected and seq.isValidTemplate(reference=self.reference): return n = 0 for seq in self.sequences: if seq.isValidTemplate(reference=self.reference): seq.selected = True n += 1 if n == n_templates: return
[docs] def hasValidTemplates(self): for seq in self.sequences: if seq.isValidTemplate(reference=self.reference): return True return False
[docs] def getTemplates(self, selected_only=False): """ Return a list of `sequences<schrodinger.ui.sequencealignment.sequence.Sequence` that are valid templates. """ templates = [] for seq in self.sequences: if selected_only and not seq.selected: continue if seq.isValidTemplate(reference=self.reference): templates.append(seq) return templates
[docs] def copySequences(self, group): not_selected = not group.hasSelectedSequences() for seq in group.sequences: if (seq.selected or not_selected) and \ seq.type != constants.SEQ_RULER: seq_copy = seq.copyForUndo() self.sequences.append(seq_copy)
[docs] def calculateMatrix(self): """ Calculates a substitution matrix based on the current alignment. """ # Calculate expected frequencies p_exp = {} amino_acids = list(constants.AMINO_ACIDS) for aa in amino_acids: p_exp[aa] = 0.0 total = 1.0 for seq in self.sequences: if seq.isValidProtein(): for res in seq.residues: if res.code in p_exp: p_exp[res.code] += 1.0 total += 1.0 for aa in list(p_exp): p_exp[aa] /= total # Calculate target (observed) pair frequencies q = {} m = {} for aa in amino_acids: for ab in amino_acids: q[aa + ab] = 0.0 m[aa + ab] = 0.0 total = 0.0 max_length = self.findMaxLength() for pos in range(max_length): for seq1 in self.sequences: if seq1.isValidProtein() and pos < seq1.length(): code1 = seq1.residues[pos].code for seq2 in self.sequences: if seq2.isValidProtein and pos < seq2.length(): code2 = seq2.residues[pos].code cc = code1 + code2 if cc in q: q[cc] += 1.0 total += 1.0 for aa in amino_acids: for ab in amino_acids: # NB: If the group is empty this causes a divide by zero error q[aa + ab] /= total if q[aa + ab] > 0.0: m[aa + ab] = math.log( old_div(q[aa + ab], (p_exp[aa] * p_exp[ab]))) return m
[docs] def alignByResidueNumbers(self): """ Aligns the sequences so that identical residue numbers are lined up in columns. """ minres = maxres = None seq_list = [] new_res_list = [] # Find residue number range for seq in self.sequences: if seq.isValidProtein(): seq_list.append(seq) seq.residues = seq.gaplessResidues() new_res_list.append([]) for res in seq.residues: if minres is None: minres = res.num elif minres > res.num: minres = res.num if maxres is None: maxres = res.num elif maxres < res.num: maxres = res.num # Iterate over residue number range resnum = minres while resnum <= maxres: found = True while found: found = False # Check if there are any residues at this position for index, seq in enumerate(seq_list): if seq.residues: if seq.residues[0].num <= resnum: found = True break if found: # If yes, pick the residue if resnum matches # or insert a gap if not for index, seq in enumerate(seq_list): if seq.residues: if seq.residues[0].num <= resnum: new_res_list[index].append(seq.residues.pop(0)) else: gap = Residue() gap.sequence = seq gap.is_gap = True gap.code = constants.UNLOCKED_GAP_SYMBOL new_res_list[index].append(gap) resnum += 1 for index, seq in enumerate(seq_list): if seq.isValidProtein(): seq.residues = new_res_list[index] seq.propagateGapsToChildren()
[docs] def addCustomSequence(self): sequence = Sequence() sequence.type = constants.SEQ_CUSTOM sequence.name = "" sequence.short_name = "Custom annotation" max_length = self.findMaxLength() sequence.residues = [ Residue(sequence=sequence, code=' ', inverted=False, color=self.inv_background_color) for x in range(max_length) ] self.sequences.append(sequence)
#USED ONLY IN TESTING! (delete later)
[docs] def getSequenceNameList(self): """ Returns a list of sequence names. """ name_list = [] for seq in self.sequences: name_list.append(seq.name) return name_list
[docs] def repair(self): """ Repairs the group by setting sequence-residue associations for all sequences in group. Also, adds missing attributes (using default values) to the group. Useful if the group comes from a corrupted project file. """ # Create dummy objects empty_sequence = Sequence() empty_residue = Residue() for seq in self.sequences: # Add missing sequence attributes for attr in list(empty_sequence.__dict__): if not hasattr(seq, attr): setattr(seq, attr, getattr(empty_sequence, attr)) for res in seq.residues: # Update residue parent res.sequence = seq # Add missing residue attributes for attr in list(empty_residue.__dict__): if not hasattr(res, attr): setattr(res, attr, getattr(empty_residue, attr)) for child in seq.children: # Add missing child sequence attributes for attr in list(empty_sequence.__dict__): if not hasattr(child, attr): setattr(child, attr, getattr(empty_sequence, attr)) for child_res in child.residues: # Update child residue parent child_res.sequence = child # Add missing child residue attributes for attr in list(empty_residue.__dict__): if not hasattr(child_res, attr): setattr(child_res, attr, getattr(empty_residue, attr)) # Add missing group attributes empty_group = SequenceGroup() for attr in list(empty_group.__dict__): if not hasattr(self, attr): setattr(self, attr, getattr(empty_group, attr))