"""
Implementation of SequenceGroup class.
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Piotr Rotkiewicz
import copy
import math
import string
from past.utils import old_div
import schrodinger
from schrodinger.ui.sequencealignment.sequence import delete_from_str
from . import constants
from . import utils
from .residue import Residue
from .sequence import Sequence
try:
from schrodinger.protein.sequence import find_generalized_pattern
except ImportError:
find_generalized_pattern = None
maestro = schrodinger.get_maestro()
[docs]class SequenceGroup(object):
"""
This class is a container for the sequences displayed in the sequence
viewer. The class performs various operations on sets of sequences
or on the entire group. After the group is modified, `SequenceViewer`
update() method should be called.
"""
[docs] def __init__(self):
self.sequences = []
self.profile = None #Global consensus profile sequence.
self.reference = None #Reference sequence
self.tree = None
self.consider_gaps = True # when calculating sequence identity.
self.name = "" #Group name
self.color_mode = constants.COLOR_SIDECHAIN_CHEMISTRY #current color scheme.
self.build_mode = 0 #: Homology modeling mode.
self.user_annotations = []
self.custom_color = (255, 255, 255)
self.background_color = (255, 255, 255)
self.inv_background_color = (0, 0, 0)
#: Calculate sequence identity in selected columns only
self.identity_in_columns = False
[docs] def copyForUndo(self, attributes_only=True):
group_copy = copy.copy(self)
if attributes_only:
group_copy.sequences = []
for seq in self.sequences:
group_copy.sequences.append(seq.copyForUndo(deep_copy=False))
group_copy.profile = copy.copy(self.profile)
group_copy.tree = copy.copy(self.tree)
else:
group_copy.sequences = copy.deepcopy(self.sequences)
group_copy.profile = copy.deepcopy(self.profile)
group_copy.tree = copy.deepcopy(self.tree)
return group_copy
[docs] def clear(self):
"""
Deletes the entire contents of self.
"""
self.sequences = []
self.profile = Sequence()
self.tree = None
self.reference = None
[docs] def addSequence(self, sequence):
"""
Adds a new sequence to self.
"""
if sequence and sequence not in self.sequences:
self.sequences.append(sequence)
[docs] def hasSelectedResidues(self):
"""
Check if the group has any residues selected.
:rtype: bool
:return: True if there are any residues selected within the group,
False otherwise
"""
for seq in self.sequences:
for res in seq.residues:
if res.selected:
return True
return False
#Used in tests only, but should get deleted eventually.
[docs] def countSelectedResidues(self):
"""
Counts selected residues.
:rtype: int
:return: Number of selected residues.
"""
count = 0
for seq in self.sequences:
if seq.isValidProtein():
for res in seq.residues:
if res.selected:
count += 1
return count
[docs] def hasSelectedSequences(self,
exclude_reference=False,
check_children=False):
"""
Check if there are any selected sequences within the group.
:type check_children: bool (default=False)
:param check_children: optional parameter, if True, the function will
also check child sequences.
:rtype: bool
:return: True if there are any sequences selected in the group,
False otherwise
"""
for seq in self.sequences:
if seq.selected and (not exclude_reference or
seq != self.reference):
return True
if check_children:
for child in seq.children:
if child.selected:
return True
return False
[docs] def selectColumns(self, start, end, select=True):
"""
Select all columns within alignment positon range from start to end.
:type start: int
:param start: first column to be selected
:type end: int
:param end: last column to be selected
"""
if start is None or end is None:
return
if start > end:
tmp = end
end = start
start = tmp
if start < 0:
start = 0
for seq in self.sequences:
for column in range(start, end + 1):
if seq.length() > column:
seq.residues[column].selected = select
[docs] def unselectAll(self, make_active=False):
"""
Unselect all residues within the entire group.
"""
for seq in self.sequences:
if make_active:
seq.makeActive()
seq.unselectResidues()
for child in seq.children:
child.unselectResidues()
[docs] def anchorSelection(self):
for seq in self.sequences:
if seq.isValidProtein():
for res in seq.residues:
if res.selected:
res.selected = False
res.active = True
else:
res.active = False
[docs] def clearAnchors(self):
for seq in self.sequences:
if seq.isValidProtein():
for res in seq.residues:
res.active = True
[docs] def hideSelected(self):
"""
Hide all selected sequences (including children).
"""
for seq in self.sequences:
if seq.selected:
seq.visible = False
seq.selected = False
for child in seq.children:
child.selected = False
[docs] def hideSequences(self, sequences):
"""
Hide all sequences passed in list.
:param sequences: sequences to hide
:type sequences: list of `sequences<Sequence>`
"""
for seq in sequences:
seq.visible = False
seq.selected = False
for child in seq.children:
child.visible = False
child.selected = False
[docs] def showSequences(self, sequences):
"""
Show all sequences passed in list.
:param sequences: sequences to show
:type sequences: list of `sequences<Sequence>`
"""
for seq in sequences:
seq.visible = True
for child in seq.children:
child.visible = True
[docs] def showAll(self):
"""
Makes all sequences within the group visible.
"""
for seq in self.sequences:
seq.visible = True
for child in seq.children:
child.visible = True
[docs] def deleteSelected(self):
"""
Deletes all selected sequences.
:note: tree will be removed.
"""
filtered_sequences = [seq for seq in self.sequences if not seq.selected]
self.sequences = filtered_sequences
for seq in self.sequences:
seq.children = [
child for child in seq.children if not child.selected
]
if len(self.sequences) == 0:
self.profile = None
self.reference = None
self.tree = []
self.updateReference()
return self.removeLonelyRuler()
[docs] def updateReference(self, ignore_maestro=False):
"""
This function updates the reference sequence.
"""
if not self.reference or self.reference not in self.sequences:
# Reset just in case the reference is not valid anymore.
self.reference = None
for seq in self.sequences:
if seq and seq.isValidProtein():
if ignore_maestro and seq.from_maestro:
continue
self.reference = seq
break
# Move refrence to the top position.
if self.reference and self.reference in self.sequences:
# self.sequences.remove(self.reference)
index = 0
for index, seq in enumerate(self.sequences):
if seq and not \
(seq.type == constants.SEQ_RULER or seq.global_sequence):
break
self.sequences.remove(self.reference)
self.sequences.insert(index, self.reference)
self.addRuler(update_only=True)
[docs] def fillGaps(self):
"""
Replaces all selected residues in visible sequences with gaps.
"""
none_selected = not self.hasSelectedSequences()
for seq in self.sequences:
if seq.visible and (none_selected or seq.selected):
for res in seq.residues:
if res.selected:
res.code = constants.UNLOCKED_GAP_SYMBOL
res.is_gap = True
res.inverted = False
res.color = (0, 0, 0)
[docs] def removeAllGaps(self):
"""
Removes all gaps from the visible sequences.
"""
none_selected = not self.hasSelectedSequences()
for seq in self.sequences:
if seq.visible and (none_selected or seq.selected):
seq.removeAllGaps(selected_only=True)
seq.propagateGapsToChildren()
[docs] def deleteSelectedResidues(self):
"""
Deletes selected residues from the visible sequences.
"""
for seq in self.sequences:
if seq.visible:
for child in seq.children:
for index, res in enumerate(child.residues):
if index < seq.length():
res.selected = seq.residues[index].selected
else:
# Truncate extra residues. This shoud not
# normally be necessary unless the sequence
# comes from a corrupted project.
res.selected = True
seq.deleteSelectedResidues()
for child in seq.children:
child.deleteSelectedResidues()
[docs] def isColumnEmpty(self, position):
"""
Checks if a column at given position is empty.
"""
for seq in self.sequences:
length = seq.length()
if seq.isValidProtein() and position < length:
if not seq.residues[position].is_gap:
return False
return True
[docs] def minimizeAlignment(self, query_only=False):
"""
Minimizes the alignment, i.e. removes all gaps from the gap-only
columns.
"""
max_length = self.findMaxLength()
for pos in range(max_length):
gaps_only = True
if query_only:
if self.reference and pos < self.reference.length() and \
self.reference.residues[pos].is_gap != True:
gaps_only = False
else:
for seq in self.sequences:
length = seq.length()
if seq.isValidProtein() and pos < length:
res = seq.residues[pos]
if not res.is_gap:
gaps_only = False
break
if gaps_only:
for seq in self.sequences:
length = seq.length()
if seq.isValidProtein() and pos < length:
seq.residues[pos].num = None
for child in seq.children:
child.residues[pos].num = None
for seq in self.sequences:
if seq.isValidProtein():
seq.residues = [
res for res in seq.residues if res.num is not None
]
for child in seq.children:
child.residues = [
res for res in child.residues if res.num is not None
]
[docs] def lockGaps(self):
"""
Locks gaps in visible sequences.
"""
no_selected = not self.hasSelectedResidues()
for seq in self.sequences:
if seq.visible:
for pos in range(len(seq.residues)):
parent_res = seq.residues[pos]
if parent_res.is_gap and \
(no_selected or parent_res.selected):
parent_res.code = constants.LOCKED_GAP_SYMBOL
for child in seq.children:
if pos not in list(range(len(child.residues))):
continue
res = child.residues[pos]
if res.is_gap and (no_selected or parent_res.selected):
res.code = constants.LOCKED_GAP_SYMBOL
[docs] def unlockGaps(self):
"""
Unlocks gaps in visible sequences.
"""
no_selected = not self.hasSelectedResidues()
for seq in self.sequences:
if seq.visible:
for pos in range(len(seq.residues)):
parent_res = seq.residues[pos]
if parent_res.is_gap and \
(no_selected or parent_res.selected):
parent_res.code = constants.UNLOCKED_GAP_SYMBOL
for child in seq.children:
if pos not in list(range(len(child.residues))):
continue
res = child.residues[pos]
if res.is_gap and (no_selected or parent_res.selected):
res.code = constants.UNLOCKED_GAP_SYMBOL
[docs] def selectAlignedBlocks(self, selected_only=False):
"""
Selects aligned blocks (regions without gaps).
"""
self.unselectAll()
any_selected = self.hasSelectedSequences()
if not any_selected:
selected_only = False
max_length = self.findMaxLength()
for pos in range(max_length):
n_res = 0
n_id = 0
for seq in self.sequences:
if seq.isValidProtein() and (not selected_only or seq.selected):
n_res += 1
if pos < seq.length():
if not seq.residues[pos].is_gap:
n_id += 1
if n_res == n_id:
for seq in self.sequences:
if seq.isValidProtein() and \
(not selected_only or seq.selected):
if pos < seq.length() and \
not seq.residues[pos].is_gap:
seq.residues[pos].selected = True
[docs] def selectStructureBlocks(self, selected_only=False):
"""
Selects aligned blocks (regions without gaps).
"""
self.unselectAll()
max_length = self.findMaxLength()
for pos in range(max_length):
n_id = 0
for seq in self.sequences:
if seq.isValidProtein() and (not selected_only or seq.selected):
if pos < seq.length():
if not seq.residues[pos].structureless:
n_id += 1
if n_id > 0:
for seq in self.sequences:
if seq.isValidProtein() and \
(not selected_only or seq.selected):
if pos < seq.length() and \
not seq.residues[pos].is_gap:
seq.residues[pos].selected = True
[docs] def selectIdentities(self):
"""
Selects identical residues in the alignment, ignores gaps.
"""
self.unselectAll()
max_length = self.findMaxLength()
for pos in range(max_length):
code = None
n_res = 0
n_id = 0
for seq in self.sequences:
if seq.isValidProtein():
if pos < seq.length():
if not seq.residues[pos].is_gap:
n_res = n_res + 1
if code is None:
code = seq.residues[pos].code
n_id += 1
elif seq.residues[pos].code == code:
n_id += 1
if n_res > 0 and n_res == n_id:
for seq in self.sequences:
if pos < seq.length() and \
not seq.residues[pos].is_gap:
seq.residues[pos].selected = True
[docs] def selectResidues(self, start_row, start_pos, end_row, end_pos, select):
"""
Selects residues within a specified region.
:type start_row: (Sequence, int, int, int)
:param start_row: initial row
:type start_pos: int
:param start_pos: initial position
:type end_row: (Sequence, int, int, int)
:param end_row: final row
:type end_pos: int
:param end_pos: final position
"""
if start_pos is None or end_pos is None:
return
start_seq, start, dummy, height = start_row
end_seq, dummy, end, height = end_row
if start_pos > end_pos:
tmp = end_pos
end_pos = start_pos
start_pos = tmp
if start_seq.parent_sequence:
start_seq = start_seq.parent_sequence
if end_seq.parent_sequence:
end_seq = end_seq.parent_sequence
# Block selection.
if (start_seq not in self.sequences) or (end_seq not in self.sequences):
return
start_seq_idx = self.sequences.index(start_seq)
end_seq_idx = self.sequences.index(end_seq)
if start_seq_idx > end_seq_idx:
tmp = end_seq_idx
end_seq_idx = start_seq_idx
start_seq_idx = tmp
for seq_idx in range(start_seq_idx, end_seq_idx + 1):
seq = self.sequences[seq_idx]
for pos in range(start_pos, end_pos + 1):
res = seq.getResidue(pos)
if res:
res.selected = select
for child in seq.children:
res = child.getResidue(pos)
if res:
res.selected = select
[docs] def grabAndDrag(self,
start_row,
start_pos,
end_row,
end_pos,
lock_down,
gap_insert_mode=False,
slide_sequence=False,
block_length=0):
"""
This method performs a grab-and-drag operation on a specified region.
:type start_row: (Sequence, int, int, int)
:param start_row: initial row
:type start_pos: int
:param start_pos: initial position
:type end_row: (Sequence, int, int, int)
:param end_row: final row
:type end_pos: int
:param end_pos: final position
:type lock_down: bool
:param lock_down: indicates if the sequence downstream should be locked
"""
if start_row is None or end_row is None:
return
start_seq, start, dummy, height = start_row
end_seq, dummy, end, height = end_row
if start_seq.parent_sequence:
start_seq = start_seq.parent_sequence
if end_seq.parent_sequence:
end_seq = end_seq.parent_sequence
if start_seq is not None and start_pos is not None and \
end_pos is not None and not start_seq.global_sequence:
parent_sequence = seq = start_seq
if slide_sequence:
if end_pos > start_pos:
n_inserted = seq.insertGaps(0, end_pos - start_pos)
for pos in range(n_inserted):
if self.isColumnEmpty(0):
for other_seq in self.sequences:
other_seq.removeGaps(0, 1)
elif end_pos < start_pos:
diff = start_pos - end_pos
n_removed = seq.removeGapsBackwards(0, start_pos - end_pos)
if diff > n_removed:
for other_seq in self.sequences:
if other_seq.isValidProtein() and other_seq != seq:
other_seq.insertGaps(0, diff - n_removed)
return
if start_pos >= 0 and start_pos < parent_sequence.length() and \
not parent_sequence.residues[start_pos].active:
return
if end_pos > start_pos:
max_insert = parent_sequence.countActiveGaps(end_pos)
n_gaps = end_pos - start_pos
if max_insert >= 0:
n_gaps = min(max_insert, n_gaps)
n_inserted = seq.insertGaps(start_pos, n_gaps)
if not gap_insert_mode and \
(not lock_down or parent_sequence.haveAnchors(end_pos + 1)):
seq.removeGaps(end_pos, n_inserted)
elif end_pos < start_pos:
if end_pos < 0:
end_pos = 0
n_removed = seq.removeGapsBackwards(end_pos,
start_pos - end_pos)
if not lock_down and start_pos < seq.length():
seq.insertGaps(start_pos + block_length - n_removed + 1,
n_removed)
else:
inactive_pos = parent_sequence.inactivePosition(start_pos)
if inactive_pos >= 0:
seq.insertGaps(inactive_pos, n_removed)
seq.propagateGapsToChildren()
[docs] def tmpLockGaps(self, row, start_pos, end_pos):
"""
Temporarily locks gaps in a specified region.
:type row: (Sequence, int, int, int)
:param row: sequence viewer row
:type start_pos: int
:param start_pos: initial position
:type end_pos: int
:param end_pos: final position
"""
seq, start, end, height = row
for res in seq.residues:
res.tmp_code = None
if res.selected:
if res.is_gap:
res.tmp_code = res.code
res.code = constants.LOCKED_GAP_SYMBOL
[docs] def tmpUnlockGaps(self, row, start_pos, end_pos):
"""
Unlocks temporarily locked gaps.
:type row: (Sequence, int, int, int)
:param row: sequence viewer row
:type start_pos: int
:param start_pos: initial position
:type end_pos: int
:param end_pos: final position
"""
seq, start, end, height = row
for res in seq.residues:
if res.selected:
if res.is_gap and res.tmp_code:
res.code = res.tmp_code
[docs] def grabAndDragBlock(self, rows, start_row, start_pos, end_row, end_pos,
block_length, lock_down):
"""
This method performs grab-and-drag operation on selected blocks
in "Select And Slide" mode.
:type rows: list of (Sequence, int, int, int)
:param rows: sequence viewer rows
:type start_row: (Sequence, int, int, int)
:param start_row: initial row
:type start_pos: int
:param start_pos: initial position
:type end_row: (Sequence, int, int, int)
:param end_row: final row
:type end_pos: int
:param end_pos: final position
:type lock_down: bool
:param lock_down: indicates if the sequence downstream should be locked
"""
start_seq, start, dummy, height = start_row
end_seq, dummy, end, height = end_row
found = False
for row in rows:
if row == start_row:
found = True
if found:
seq, dum, dum, dum = row
if not seq.parent_sequence:
self.tmpLockGaps(row, start_pos, end_pos)
self.grabAndDrag(row,
start_pos,
row,
end_pos,
lock_down,
block_length=block_length)
self.tmpUnlockGaps(row, start_pos, end_pos)
if row == end_row:
break
[docs] def insertGap(self, start_row, start_pos):
"""
Inserts a single gap at a specified position.
:type start_row: (Sequence, int, int, int)
:param start_row: sequence viewer row
:type start_pos: int
:param start_pos: position where the gap will be inserted
"""
start_seq, start, dummy, height = start_row
if start_seq is not None:
parent_sequence = seq = start_seq
n_children = 0
if seq.parent_sequence:
parent_sequence = seq = seq.parent_sequence
n_children = len(parent_sequence.children)
child_index = 0
while seq:
seq.insertGaps(start_pos, 1)
# Iterate over children.
if child_index < n_children:
seq = parent_sequence.children[child_index]
child_index += 1
else:
seq = None
[docs] def removeGap(self, start_row, start_pos):
"""
Removes a single gap at a specified position.
:type start_row: (Sequence, int, int, int)
:param start_row: sequence viewer row
:type start_pos: int
:param start_pos: position where the gap will be removed
"""
start_seq, start, dummy, height = start_row
if start_seq is not None:
parent_sequence = seq = start_seq
n_children = 0
if seq.parent_sequence:
parent_sequence = seq = seq.parent_sequence
n_children = len(parent_sequence.children)
child_index = 0
while seq:
seq.removeGaps(start_pos, 1)
# Iterate over children.
if child_index < n_children:
seq = parent_sequence.children[child_index]
child_index += 1
else:
seq = None
[docs] def colorBySequenceSimilarity(self):
"""
This method colors the sequences by sequence similarity.
"""
self.colorByResidueType()
for seq in self.sequences:
if seq.isValidProtein(global_annotation=True):
for pos in range(seq.length()):
res = seq.residues[pos]
color = (0, 0, 0)
if res.is_gap:
res.inverted = False
else:
res.inverted = True
color = self.background_color
if self.reference and pos < self.reference.length():
profile_res = self.reference.residues[pos]
if res.code == profile_res.code:
color = (255, 64, 64)
else:
score = utils.matrixValue(
constants.SIMILARITY_MATRIX, res.code,
profile_res.code)
if score > 0.0:
color = (255, 128, 0)
res.color = color
[docs] def colorByIdentity(self):
"""
This method colors the sequences by identity.
"""
if not self.reference:
return
for pos in range(self.reference.length()):
ref_code = self.reference.residues[pos].code
if ref_code != '.':
for seq in self.sequences:
if seq.isValidProtein():
if pos < seq.length():
res = seq.residues[pos]
if not res.is_gap:
if res.code != ref_code:
res.color = self.background_color
[docs] def colorByDifference(self):
"""
This method colors the sequences by difference.
"""
if not self.reference:
return
for pos in range(self.reference.length()):
ref_code = self.reference.residues[pos].code
if ref_code != '.':
for seq in self.sequences:
if seq.isValidProtein():
if pos < seq.length():
res = seq.residues[pos]
if not res.is_gap:
if res.code == ref_code:
res.color = self.background_color
[docs] def calculateProfile(self, ignore_query=False):
"""
Calculates global sequence profile and associated information.
:type ignore_query: bool
:param ignore_query: Tells if query sequence should be included in
the calculation or it should be ignored.
"""
max_length = self.findMaxLength(exclude_consensus=True)
if not self.profile:
self.profile = Sequence()
self.profile.short_name = "Sequence Profile"
for pos in range(max_length):
res = Residue()
res.sequence = self.profile
self.profile.residues.append(res)
self.updateReference()
if self.profile.length() < max_length:
self.profile.residues = []
d_residues = max_length - self.profile.length()
for pos in range(d_residues):
res = Residue()
res.sequence = self.profile
self.profile.residues.append(res)
else:
self.profile.residues = self.profile.residues[:max_length]
amino_acid_symbols = list(constants.AMINO_ACIDS)
n_seq = 0
for seq in self.sequences:
if seq == self.reference and ignore_query:
continue
if seq.isValidProtein():
n_seq += 1
if n_seq == 0:
return
i_n_seq = old_div(1.0, n_seq)
if ignore_query or n_seq == 1:
i_n_seq_1 = i_n_seq
else:
i_n_seq_1 = old_div(1.0, (n_seq - 1))
for pos in range(max_length):
frequency = {}
for aa in string.ascii_uppercase:
frequency[aa] = 0.0
code = None
n_res = 0
n_id = 0
n_ungapped = 0
column = ""
for seq in self.sequences:
if seq == self.reference and ignore_query:
continue
if seq.isValidProtein():
if pos < seq.length():
res = seq.residues[pos]
if not res.is_gap:
n_ungapped += 1
if not res.is_gap or self.consider_gaps:
if res.code in amino_acid_symbols:
frequency[res.code] += 1
column += res.code
if code is None:
code = res.code
n_id += 1
elif res.code == code:
n_id += 1
n_res = n_res + 1
if not self.consider_gaps and n_res > 0:
factor = old_div(1.0, n_res)
else:
factor = i_n_seq
res = self.profile.residues[pos]
res.composition = set(column)
res.factor = n_ungapped * factor
max = 0.0
max_code = aa
for aa in amino_acid_symbols:
if aa == 'X':
break
f = float(frequency[aa])
if f > max:
max = f
max_code = aa
# Shannon entropy
entropy = 0.0
for aa in amino_acid_symbols:
fract = float(frequency[aa]) * i_n_seq
if fract > 0.0:
entropy += -fract * math.log(fract, 2.0)
res.bits = math.log(20.0, 2.0) - entropy
res.entropy = old_div(entropy, -math.log(old_div(1.0, 20.0), 2.0))
n_max = 0
profile_codes = []
if max > 0.0:
for aa in amino_acid_symbols:
if max == float(frequency[aa]):
profile_codes.append(aa)
n_max += 1
# Query residue fraction
res.query_fraction = 0.0
if pos < len(self.reference.residues) and \
not self.reference.residues[pos].is_gap:
query_aa = self.reference.residues[pos].code
if query_aa == '+':
query_aa = self.reference.residues[pos].profile_codes[pos]
if not ignore_query:
res.query_fraction = (frequency[query_aa] - 1) * i_n_seq_1
else:
res.query_fraction = frequency[query_aa] * i_n_seq_1
if n_res == n_id:
res.id = True
else:
res.id = False
max *= factor
res.value = max
res.profile_codes = profile_codes
if code:
res.is_gap = False
if n_max == 1:
res.code = max_code
else:
res.code = '+'
else:
res.code = '.'
for aa in amino_acid_symbols:
frequency[aa] *= i_n_seq
res.frequencies = sorted(list(frequency.items()),
key=lambda x: x[1],
reverse=True)
# Now calculate sequence identity for every sequence in the group.
for seq in self.sequences:
if seq.isValidProtein():
seq.identity = seq.calcIdentity(self.reference,
self.consider_gaps,
self.identity_in_columns)
seq.similarity = seq.calcSimilarity(self.reference,
self.consider_gaps,
self.identity_in_columns)
seq.score = seq.calcScore(self.reference, self.consider_gaps,
self.identity_in_columns)
seq.homology = seq.calcHomology(self.reference,
self.consider_gaps,
self.identity_in_columns)
[docs] def getConsensus(self):
"""
Returns a consensus sequence.
"""
for seq in self.sequences:
if seq.type == constants.SEQ_CONSENSUS:
return seq
return None
[docs] def updateConsensus(self, consensus):
"""
Updates the consensus sequence using a pre-calculated profile.
:type consensus: `Sequence`
:param consensus: consensus sequence to be updated
"""
consensus.residues = []
consensus.global_sequence = True
aa_codes = list(constants.AMINO_ACIDS)
for pos in range(self.profile.length()):
res = Residue()
res.code = self.profile.residues[pos].code
if res.code not in aa_codes:
res.color = self.inv_background_color
res.inverted = True
res.value = self.profile.residues[pos].value
res.profile_codes = self.profile.residues[pos].profile_codes
res.sequence = consensus
consensus.residues.append(res)
if len(consensus.children) > 0:
# Update the plot only if there is one.
plot = consensus.children[0]
plot.residues = []
plot.global_sequence = True
for pos in range(self.profile.length()):
res = Residue()
res.code = self.profile.residues[pos].code
res.value = self.profile.residues[pos].value
res.sequence = plot
res.color = (32, 32, 32)
plot.residues.append(res)
plot.calculatePlotValues(0, min_value=0.0, max_value=1.0)
consensus.identity = consensus.calcIdentity(self.reference,
self.consider_gaps, False)
consensus.similarity = consensus.calcSimilarity(self.reference,
self.consider_gaps,
False)
consensus.homology = consensus.calcHomology(self.reference,
self.consider_gaps, False)
consensus.score = consensus.calcScore(self.reference,
self.consider_gaps, False)
[docs] def updateSymbols(self, symbols):
"""
Updates the consensus symbols string using a pre-calculated profile.
:type symbols: `Sequence`
:param symbols: consensus symbols string to be updated
"""
strong_sets = [
set("STA"),
set("NEQK"),
set("NHQK"),
set("NDEQ"),
set("QHRK"),
set("MILV"),
set("MILF"),
set("HY"),
set("FYW")
]
weak_sets = [
set("CSA"),
set("ATV"),
set("SAG"),
set("STNK"),
set("STPA"),
set("SGND"),
set("SNDEQK"),
set("NDEQHK"),
set("NEQHRK"),
set("FVLIM"),
set("HFY")
]
symbols.residues = []
symbols.global_sequence = True
for pos in range(self.profile.length()):
res = Residue()
profile_res = self.profile.residues[pos]
res.value = profile_res.value
res.profile_codes = profile_res.profile_codes
res.sequence = symbols
res.code = ' '
res.color = self.background_color
if profile_res.factor == 1.0 and profile_res.composition:
if profile_res.id and len(profile_res.composition) == 1:
res.code = '*'
else:
for strong in strong_sets:
if profile_res.composition.issubset(strong):
res.code = ':'
break
if res.code == ' ':
for weak in weak_sets:
if profile_res.composition.issubset(weak):
res.code = '.'
break
symbols.residues.append(res)
[docs] def updateMeanHydrophobicity(self, seq):
"""
Updates mean hydrophobicity global annotation sequence.
:type seq: `Sequence`
:param seq: mean hydrophobicity sequence
"""
seq.residues = []
seq.plot_style = constants.PLOT_HISTOGRAM
seq.plot_color = (192, 128, 160)
seq.global_sequence = True
for pos in range(self.profile.length()):
res = Residue()
res.code = self.profile.residues[pos].code
n_aa = 0
total = 0.0
res.profile_codes = self.profile.residues[pos].profile_codes
for aa in res.profile_codes:
if aa != 'X':
total += constants.HYDROPHOBICITY_SCALES[aa][0]
n_aa += 1
if n_aa > 0:
total /= n_aa
res.value = total
res.sequence = seq
seq.residues.append(res)
seq.calculatePlotValues(2)
[docs] def updateMeanPI(self, seq):
"""
Updates mean isoelectric point global annotation sequence.
:type seq: `Sequence`
:param seq: mean hydrophobicity sequence
"""
seq.residues = []
seq.plot_style = constants.PLOT_HISTOGRAM
seq.plot_color = (128, 192, 160)
seq.global_sequence = True
for pos in range(self.profile.length()):
res = Residue()
res.code = self.profile.residues[pos].code
n_aa = 0
total = 0.0
res.profile_codes = self.profile.residues[pos].profile_codes
for aa in res.profile_codes:
if aa != 'X':
total += constants.PI_SCALE[aa]
n_aa += 1
if n_aa > 0:
total /= n_aa
res.value = total
res.sequence = seq
seq.residues.append(res)
seq.calculatePlotValues(2)
[docs] def colorByTaylorScheme(self):
"""
Colors all amino acid sequences using Taylor scheme.
"""
for seq in self.sequences:
if seq.isValidProtein(global_annotation=True):
for res in seq.residues:
res.color = res.colorTaylor()
if not res.is_gap:
res.inverted = True
else:
res.inverted = False
[docs] def colorAverageColumnColors(self):
"""
Averages colors in columns.
:note: This feature works particularly well with the Taylor scheme.
"""
max_length = self.findMaxLength()
for pos in range(max_length):
r = g = b = 0
n_res = 0
for seq in self.sequences:
if seq.isValidProtein():
if pos < seq.length():
res = seq.residues[pos]
if not res.is_gap:
res_r, res_g, res_b = res.color
r += res_r
g += res_g
b += res_b
n_res += 1
if n_res:
i_n_res = old_div(1.0, n_res)
r *= i_n_res
g *= i_n_res
b *= i_n_res
for seq in self.sequences:
if seq.isValidProtein():
if pos < seq.length():
res = seq.residues[pos]
if not res.is_gap:
seq.residues[pos].color = \
(int(r), int(g), int(b))
[docs] def colorWeightByAlignmentStrength(self, min_weight_identity,
max_weight_identity):
"""
Weights the sequences by alignment stregth.
"""
inv_range = old_div(1.0, (max_weight_identity - min_weight_identity))
profile_length = 0
if self.profile:
profile_length = self.profile.length()
br, bg, bb = self.background_color
for seq in self.sequences:
if seq.isValidProtein():
for pos in range(seq.length()):
res = seq.residues[pos]
if not res.is_gap and pos < profile_length:
res_r, res_g, res_b = res.color
value = 100.0 * self.profile.residues[pos].value
value = value - min_weight_identity
if value < 0:
value = 0
value *= inv_range
if value > 1.0:
value = 1.0
res_r = br * (1.0 - value) + res_r * value
res_g = bg * (1.0 - value) + res_g * value
res_b = bb * (1.0 - value) + res_b * value
res.color = (res_r, res_g, res_b)
[docs] def findMaxLength(self, exclude_consensus=False):
"""
Finds a length of the longest sequence.
:type exclude_consensus: bool
:param exclude_consensus: if True, the consensus sequence will not count
:rtype: int
:return: maximum sequence length in the entire group
"""
max_length = 0
for seq in self.sequences:
if seq and ((not exclude_consensus) or seq.type != \
constants.SEQ_CONSENSUS) and not seq.global_sequence:
if seq.length() > max_length:
max_length = seq.length()
return max_length
[docs] def unselectAllSequences(self):
"""
Unselects all sequences in the group.
"""
for seq in self.sequences:
seq.selected = False
for child in seq.children:
child.selected = False
[docs] def selectAllSequences(self):
"""
Selects all sequences in the group. Child selection remains unchanged.
"""
for seq in self.sequences:
if seq.type != constants.SEQ_SEPARATOR and \
seq.type != constants.SEQ_RULER:
seq.selected = True
# for child in seq.children:
# child.selected = False
#residues should be in name!
[docs] def selectAll(self):
"""
Selects all residues.
"""
for seq in self.sequences:
seq.selectAllResidues()
[docs] def deselectAll(self):
"""
Deselects all residues.
"""
for seq in self.sequences:
seq.unselectResidues()
[docs] def invertSequenceSelection(self):
"""
Inverts sequence selection range.
"""
for seq in self.sequences:
if len(seq.residues) > 0:
if seq.selected:
seq.selected = False
for child in seq.children:
child.selected = False
else:
seq.selected = True
for child in seq.children:
child.selected = False
[docs] def invertSelection(self):
"""
Inverts residue selection range.
"""
for seq in self.sequences:
seq.invertSelection()
#ANNOTATIONS WILL NOT BE PART OF SEQUENCE:
[docs] def deleteAnnotations(self):
"""
Removes all annotations.
"""
self.sequences = [
seq for seq in self.sequences
if seq.type != constants.SEQ_ANNOTATION and
seq.type != constants.SEQ_CONSENSUS and
seq.type != constants.SEQ_SYMBOLS and seq.type != constants.SEQ_LOGO
and seq.type != constants.SEQ_SECONDARY
]
not_selected = True
if self.hasSelectedSequences():
not_selected = False
for seq in self.sequences:
if not_selected or seq.selected:
seq.children = [
child for child in seq.children
if child.annotation_type == constants.ANNOTATION_SSP or
child.annotation_type == constants.ANNOTATION_ACC or
child.annotation_type == constants.ANNOTATION_DIS or
child.annotation_type == constants.ANNOTATION_DOM or
child.annotation_type == constants.ANNOTATION_CCB or
child.annotation_type == constants.ANNOTATION_PFAM
]
self.updateReference()
[docs] def deleteGlobalAnnotations(self):
"""
Removes all global annotations.
"""
self.sequences = [
seq for seq in self.sequences
if seq.type != constants.SEQ_CONSENSUS and
seq.type != constants.SEQ_SYMBOLS and seq.type != constants.SEQ_LOGO
and not (seq.type == constants.SEQ_ANNOTATION and
(seq.annotation_type == constants.ANNOTATION_MEAN_HYDRO or
seq.annotation_type == constants.ANNOTATION_MEAN_PI))
]
self.updateReference()
[docs] def deletePredictions(self):
"""
Removes all predictions.
"""
not_selected = True
if self.hasSelectedSequences():
not_selected = False
for seq in self.sequences:
if not_selected or seq.selected:
seq.children = [
child for child in seq.children
if child.annotation_type != constants.ANNOTATION_SSP and
child.annotation_type != constants.ANNOTATION_ACC and
child.annotation_type != constants.ANNOTATION_DIS and
child.annotation_type != constants.ANNOTATION_DOM and
child.annotation_type != constants.ANNOTATION_CCB and
child.annotation_type != constants.ANNOTATION_PFAM
]
self.updateReference()
[docs] def addSeparator(self):
"""
Adds a separator sequence.
"""
seq = Sequence()
seq.type = constants.SEQ_SEPARATOR
self.sequences.append(seq)
[docs] def addRuler(self, update_only=False):
"""
Adds a ruler sequence.
:rtype: `Sequence`
:return: A ruler sequence.
"""
if len(self.sequences) == 0:
return None
for seq in self.sequences:
if seq.type == constants.SEQ_RULER:
self.sequences.remove(seq)
self.sequences.insert(0, seq)
seq.visible = True
return seq
if update_only:
return
seq = Sequence()
seq.type = constants.SEQ_RULER
self.sequences.insert(0, seq)
return seq
[docs] def removeRuler(self):
"""
Removes a ruler sequence.
"""
for seq in self.sequences:
if seq.type == constants.SEQ_RULER:
self.sequences.remove(seq)
[docs] def addConsensusSequence(self, toggle=False):
"""
Creates a consensus sequence and adds it to the group.
If the annotation already exists, just expands the sequence and
makes it visible.
:rtype: `Sequence`
:return: consensus sequence
"""
seq = None
self.calculateProfile()
self.updateVariableSequences()
# Check if we don't have a consensus sequence already.
for seq in self.sequences:
if seq.isValidProtein():
break
if seq.type == constants.SEQ_CONSENSUS:
if toggle:
self.sequences.remove(seq)
return
seq.visible = True
seq.showChildren()
return seq
if not seq:
return None
if not seq.isValidProtein():
return None
# Create a consensus sequence
consensus = Sequence()
consensus.type = constants.SEQ_CONSENSUS
consensus.name = "Consensus Sequence"
consensus.short_name = "Consensus Sequence"
consensus.global_sequence = True
index = self.sequences.index(seq)
self.sequences.insert(index, consensus)
seq = Sequence()
seq.type = constants.SEQ_ANNOTATION
seq.parent_sequence = consensus
seq.height = 3
seq.name = "Consensus Plot"
seq.short_name = "Consensus Plot"
seq.plot_color = (96, 128, 96)
consensus.children.append(seq)
self.updateReference()
return consensus
[docs] def addConsensusSymbols(self, toggle=False):
"""
Creates a consensus symbols string and adds it to the group.
If the annotation already exists, just expands the sequence and
makes it visible.
:rtype: `Sequence`
:return: consensus sequence
"""
self.calculateProfile()
self.updateVariableSequences()
# Check if we don't have a consensus sequence already.
seq = None
for seq in self.sequences:
if seq.isValidProtein():
break
if seq.type == constants.SEQ_SYMBOLS:
if toggle:
self.sequences.remove(seq)
return
seq.visible = True
seq.showChildren()
return seq
if not seq or not seq.isValidProtein():
return None
# Create a consensus sequence
symbols = Sequence()
symbols.type = constants.SEQ_SYMBOLS
symbols.name = "Consensus Symbols"
symbols.short_name = "Consensus Symbols"
symbols.global_sequence = True
index = self.sequences.index(seq)
self.sequences.insert(index, symbols)
return symbols
[docs] def addMeanHydrophobicity(self, toggle=False):
"""
Creates a mean hydrophobicity annotation and adds it to the group.
If the annotation already exists, just expands the sequence and
makes it visible.
:rtype: `Sequence`
:return: mean hydrophbicity annotation
"""
self.calculateProfile()
self.updateVariableSequences()
if len(self.sequences) == 0:
return None
# Find a first amino acid sequence.
for seq in self.sequences:
if seq.isValidProtein():
break
if seq.type == constants.SEQ_ANNOTATION and \
seq.annotation_type == constants.ANNOTATION_MEAN_HYDRO:
if toggle:
self.sequences.remove(seq)
return
seq.visible = True
seq.showChildren()
return seq
if not seq.isValidProtein():
return None
index = self.sequences.index(seq)
# Create a plot sequence.
plot = Sequence()
plot.type = constants.SEQ_ANNOTATION
plot.annotation_type = constants.ANNOTATION_MEAN_HYDRO
plot.name = "Mean Hydrophobicity"
plot.short_name = "Mean Hydrophobicity"
plot.height = 3
plot.global_sequence = True
self.sequences.insert(index, plot)
return plot
[docs] def addMeanPI(self, toggle=False):
"""
Creates a mean isoelectric point annotation and adds it to the group.
If the annotation already exists, just expands the sequence and
makes it visible.
:rtype: `Sequence`
:return: mean hydrophbicity annotation
"""
self.calculateProfile()
self.updateVariableSequences()
if len(self.sequences) == 0:
return None
# Find a first amino acid sequence.
for seq in self.sequences:
if seq.isValidProtein():
break
if seq.type == constants.SEQ_ANNOTATION and \
seq.annotation_type == constants.ANNOTATION_MEAN_PI:
if toggle:
self.sequences.remove(seq)
return
seq.visible = True
seq.showChildren()
return seq
if not seq.isValidProtein():
return None
index = self.sequences.index(seq)
# Create a plot sequence.
plot = Sequence()
plot.type = constants.SEQ_ANNOTATION
plot.annotation_type = constants.ANNOTATION_MEAN_PI
plot.name = "Mean Isoelectric Point"
plot.short_name = "Mean Isoelectric Point"
plot.height = 3
plot.global_sequence = True
self.sequences.insert(index, plot)
return plot
[docs] def addSequenceLogo(self, toggle=False):
"""
Creates a sequence logo annotation and adds it to the group.
If the annotation already exists, just expands the sequence and
makes it visible.
:rtype: `Sequence`
:return: sequence logo annotation object
"""
if len(self.sequences) == 0:
return None
# Find a first amino acid sequence.
for seq in self.sequences:
if seq.isValidProtein():
break
if seq.type == constants.SEQ_LOGO:
if toggle:
self.sequences.remove(seq)
return
seq.visible = True
return seq
if not seq.isValidProtein():
return None
index = self.sequences.index(seq)
# Create a plot sequence.
logo = Sequence()
logo.type = constants.SEQ_LOGO
logo.name = "Sequence Logo"
logo.short_name = "Sequence Logo"
logo.height = 3
logo.global_sequence = True
self.sequences.insert(index, logo)
return logo
[docs] def colorByResidueType(self):
for seq in self.sequences:
if seq.visible and \
seq.isValidProtein(global_annotation=True):
for res in seq.residues:
if not res.is_gap:
res.inverted = True
else:
res.inverted = False
res.color = res.colorType()
[docs] def colorByMaestro(self):
"""
Colors the sequences by Maestro colors.
"""
for seq in self.sequences:
if seq.visible and \
seq.from_maestro:
seq.color_mode = constants.COLOR_MAESTRO
for res in seq.residues:
if res.is_gap:
res.color = (0, 0, 0)
else:
res.color = res.maestro_color
if not res.is_gap:
res.inverted = True
else:
res.inverted = False
[docs] def colorByKyteDoolittle(self):
"""
Colors the sequences by Kyte-Doolittle hydrophobicity scale.
"""
for seq in self.sequences:
if seq.visible and seq.isValidProtein(global_annotation=True):
for res in seq.residues:
res.color = res.colorKD()
if not res.is_gap:
res.inverted = True
else:
res.inverted = False
[docs] def colorByHoppWoods(self):
"""
Colors the sequences by Hopp-Woods hydrophilicity scale.
"""
for seq in self.sequences:
if seq.visible and seq.isValidProtein(global_annotation=True):
for res in seq.residues:
res.color = res.colorHW()
if not res.is_gap:
res.inverted = True
else:
res.inverted = False
[docs] def colorByGray(self):
"""
Colors the sequences using plain gray color. This scheme is useful
in combination with "color by alignment strength."
"""
for seq in self.sequences:
if seq.visible and seq.isValidProtein(global_annotation=True):
seq.color_scheme = constants.COLOR_BLACK
for res in seq.residues:
if not res.is_gap:
res.inverted = True
res.color = (63, 63, 63)
else:
res.inverted = False
res.color = (0, 0, 0)
[docs] def colorByWhite(self):
"""
Colors the sequences using white color.
"""
for seq in self.sequences:
if seq.visible and seq.isValidProtein(global_annotation=True):
seq.color_scheme = constants.COLOR_WHITE
for res in seq.residues:
if not res.is_gap:
res.inverted = True
res.color = (255, 255, 255)
else:
res.inverted = False
res.color = (0, 0, 0)
[docs] def colorByCustomColor(self, color):
"""
Colors the sequences using custom color.
"""
for seq in self.sequences:
if seq.visible and seq.isValidProtein(global_annotation=True):
seq.color_scheme = constants.COLOR_CUSTOM
seq.custom_color = color
for res in seq.residues:
if not res.is_gap:
res.inverted = True
res.color = color
else:
res.inverted = False
res.color = (0, 0, 0)
[docs] def colorByCustomAnnotations(self):
"""
Colors the sequences using custom annotation color.
"""
for seq in self.sequences:
if seq.visible and seq.isValidProtein(global_annotation=True):
for child in seq.children:
if child.annotation_type == constants.ANNOTATION_CUSTOM:
for index, res in enumerate(seq.residues):
if child.residues[index].color != (255, 255, 255):
res.color = child.residues[index].color
[docs] def colorByBfactor(self):
"""
Colors the sequences using Bfactor values.
"""
for seq in self.sequences:
if seq.visible and seq.isValidProtein():
seq.color_scheme = constants.COLOR_BFACTOR
minbf = seq.residues[0].bfactor
maxbf = seq.residues[-1].bfactor
for res in seq.residues:
if res.bfactor < minbf:
minbf = res.bfactor
if res.bfactor > maxbf:
maxbf = res.bfactor
dbf = maxbf - minbf
if dbf > 0.0:
dbf = old_div(1.0, dbf)
else:
dbf = 1.0
for res in seq.residues:
if not res.is_gap:
res.color = utils.colorGradientGreenRed(
1.5 * dbf * (res.bfactor - minbf) - 0.75)
else:
res.inverted = False
res.color = self.inv_background_color
[docs] def colorByPosition(self):
"""
Colors the sequences using residue position.
"""
for seq in self.sequences:
if seq.visible and seq.isValidProtein():
seq.color_scheme = constants.COLOR_POSITION
residues = seq.gaplessResidues()
maxpos = float(len(residues))
for index, res in enumerate(residues):
res.color = utils.colorGradientGreenRed(
1.5 * (old_div(index, maxpos)) - 0.75)
[docs] def colorBySecondary(self):
"""
Colors the sequences using secondary structure annotation. If SSA
is available, if will be used for coloring, otherwise a consensus
of available SSP annotations will be used here.
"""
for seq in self.sequences:
if seq.visible and seq.isValidProtein(global_annotation=True):
seq.color_scheme = constants.COLOR_SECONDARY
ssa = None
for child in seq.children:
if child.type == constants.SEQ_SECONDARY:
ssa = child
break
ssp_list = []
if ssa is None:
for child in seq.children:
if child.annotation_type == constants.ANNOTATION_SSP:
ssp_list.append(child)
if ssa is None and len(ssp_list) == 0:
continue
for index, res in enumerate(seq.residues):
if not res.is_gap:
res.inverted = True
if ssa:
if ssa.residues[index].code == 'H':
res.color = constants.SS_COLOR_HELIX
elif ssa.residues[index].code == 'E':
res.color = constants.SS_COLOR_EXTENDED
else:
res.color = (192, 192, 192)
else:
avg_color = [0, 0, 0]
i_n_ssp = old_div(1.0, float(len(ssp_list)))
for ssp in ssp_list:
if ssp.residues[index].code == 'H':
avg_color[0] += constants.SS_COLOR_HELIX[0]
avg_color[1] += constants.SS_COLOR_HELIX[1]
avg_color[2] += constants.SS_COLOR_HELIX[2]
elif ssp.residues[index].code == 'E':
avg_color[0] += constants.SS_COLOR_EXTENDED[
0]
avg_color[1] += constants.SS_COLOR_EXTENDED[
1]
avg_color[2] += constants.SS_COLOR_EXTENDED[
2]
else:
avg_color[0] += 192
avg_color[1] += 192
avg_color[2] += 192
avg_color[0] *= i_n_ssp
avg_color[1] *= i_n_ssp
avg_color[2] *= i_n_ssp
res.color = (int(avg_color[0]), int(avg_color[1]),
int(avg_color[2]))
else:
res.inverted = False
res.color = (0, 0, 0)
[docs] def colorByColorBlocks(self, scheme):
"""
Colors the sequences using plain gray color. This scheme is useful
in combination with "color by alignment strength."
"""
for seq in self.sequences:
if seq.visible and seq.isValidProtein(global_annotation=True):
seq.color_scheme = scheme
for res in seq.residues:
# if not res.is_gap and not res.code in ['.', '+']:
res.inverted = True
# else:
# res.inverted = False
if scheme == constants.COLOR_HELIX_PROPENSITY:
res.color = res.colorHelixPropensity()
elif scheme == constants.COLOR_STRAND_PROPENSITY:
res.color = res.colorStrandPropensity()
elif scheme == constants.COLOR_TURN_PROPENSITY:
res.color = res.colorTurnPropensity()
elif scheme == constants.COLOR_SIDECHAIN_CHEMISTRY:
res.color = res.colorSidechainChemistry()
elif scheme == constants.COLOR_STERIC_GROUP:
res.color = res.colorStericGroup()
elif scheme == constants.COLOR_EXPOSURE_TENDENCY:
res.color = res.colorExposureTendency()
[docs] def colorSequences(self, mode=None, color=None):
"""
Colors the sequences using a specified mode.
:type mode: int
:param mode: coloring mode
"""
if mode is None:
mode = self.color_mode
if mode == constants.COLOR_BLACK:
self.colorByGray()
elif mode == constants.COLOR_WHITE:
self.colorByWhite()
elif mode == constants.COLOR_RESIDUE_TYPE:
self.colorByResidueType()
elif mode == constants.COLOR_SIMILARITY:
self.colorBySequenceSimilarity()
elif mode == constants.COLOR_KYTE_DOOLITTLE:
self.colorByKyteDoolittle()
elif mode == constants.COLOR_HOPP_WOODS:
self.colorByHoppWoods()
elif mode == constants.COLOR_TAYLOR:
self.colorByTaylorScheme()
elif mode == constants.COLOR_MAESTRO:
self.colorByMaestro()
elif mode == constants.COLOR_SECONDARY:
self.colorBySecondary()
elif mode == constants.COLOR_BFACTOR:
self.colorByBfactor()
elif mode == constants.COLOR_POSITION:
self.colorByPosition()
elif mode == constants.COLOR_HELIX_PROPENSITY:
self.colorByColorBlocks(constants.COLOR_HELIX_PROPENSITY)
elif mode == constants.COLOR_STRAND_PROPENSITY:
self.colorByColorBlocks(constants.COLOR_STRAND_PROPENSITY)
elif mode == constants.COLOR_TURN_PROPENSITY:
self.colorByColorBlocks(constants.COLOR_TURN_PROPENSITY)
elif mode == constants.COLOR_SIDECHAIN_CHEMISTRY:
self.colorByColorBlocks(constants.COLOR_SIDECHAIN_CHEMISTRY)
elif mode == constants.COLOR_EXPOSURE_TENDENCY:
self.colorByColorBlocks(constants.COLOR_EXPOSURE_TENDENCY)
elif mode == constants.COLOR_STERIC_GROUP:
self.colorByColorBlocks(constants.COLOR_STERIC_GROUP)
elif mode == constants.COLOR_CUSTOM:
if color is not None:
self.custom_color = color
self.colorByCustomColor(self.custom_color)
self.colorByCustomAnnotations()
[docs] def hideAllChildren(self):
"""
Hides all children, effectively collapsing the sequences.
"""
for seq in self.sequences:
seq.hideChildren()
[docs] def showAllChildren(self):
"""
Shows all children, effectively expanding the sequences.
"""
for seq in self.sequences:
seq.showChildren()
[docs] def updateVariableSequences(self):
"""
Updates global variable sequences, i.e. consensus plot and "mean"
annotations.
"""
self.updateReference()
for seq in self.sequences:
if seq.type == constants.SEQ_CONSENSUS:
self.updateConsensus(seq)
elif seq.type == constants.SEQ_SYMBOLS:
self.updateSymbols(seq)
elif seq.type == constants.SEQ_ANNOTATION:
if seq.annotation_type == constants.ANNOTATION_MEAN_HYDRO:
self.updateMeanHydrophobicity(seq)
elif seq.annotation_type == constants.ANNOTATION_MEAN_PI:
self.updateMeanPI(seq)
[docs] def padAlignment(self):
"""
Pads the alignment with additional gaps, so all sequences have
identical length equal to the length of the longest sequence.
"""
max_length = self.findMaxLength(exclude_consensus=True)
for seq in self.sequences:
if seq.isValidProtein():
if seq.length() and seq.length() < max_length:
active = seq.residues[seq.length() - 1].active
seq.insertGaps(seq.length(),
max_length - seq.length(),
active=active)
for child in seq.children:
if child.length() < max_length:
child.insertGaps(child.length(),
max_length - child.length())
[docs] def removeTerminalGaps(self):
"""
Removes terminal gaps from the alignment.
"""
for seq in self.sequences:
if seq.isValidProtein():
while len(seq.residues) and seq.residues[-1].is_gap:
seq.residues.pop()
if not len(seq.residues):
self.sequences.remove(seq)
else:
for child in seq.children:
while len(child.residues) and child.residues[-1].is_gap:
child.residues.pop()
[docs] def encode(self, seq):
"""
Encodes the sequence to include annotation data required by
pattern search. Return an ungapped version of the pattern.
"""
ssa = None
for child in seq.children:
if child.type == constants.SEQ_SECONDARY:
ssa = child
break
acc = None
for child in seq.children:
if child.annotation_type == constants.ANNOTATION_ACC:
acc = child
flex = False
avg_flexibility = 0.0
n_residues = 0
for aa in seq.residues:
if aa.is_gap:
continue
if aa.bfactor:
avg_flexibility += aa.bfactor
n_residues += 1
if aa.bfactor > 0.0:
flex = True
if n_residues > 0:
avg_flexibility /= n_residues
seq_string = ""
ssa_string = ""
acc_string = ""
flx_string = ""
res_ids = []
for index, aa in enumerate(seq.residues):
if aa.is_gap:
continue
seq_string += aa.code
# Store ID (includes residue number and insertion code)
res_ids.append(aa.id().strip())
if ssa:
ssa_string += ssa.residues[index].code.lower()
if acc:
acc_string += acc.residues[index].code
if flex:
if aa.bfactor < avg_flexibility:
flx_string += 'r'
else:
flx_string += 'f'
result = {}
result['amino_acids'] = seq_string
result['res_ids'] = res_ids
if ssa:
result['secondary_structure'] = ssa_string
if acc:
result['solvent_accessibility'] = acc_string
if flex:
result['flexibility'] = flx_string
return result
[docs] def findPattern(self, pattern):
"""
Finds a specified PROSITE pattern in all sequences.
"""
self.unselectAll()
if pattern:
pattern = pattern.strip().rstrip()
if not pattern:
for seq in self.sequences:
seq.makeActive()
if not find_generalized_pattern:
return
if not pattern:
return
seq_string_list = []
sequence_list = []
if not delete_from_str(pattern, string.ascii_uppercase):
new_pattern = pattern[0]
for c in pattern[1:]:
new_pattern += '-' + c
pattern = new_pattern
count = 0
for seq in self.sequences:
if seq.isValidProtein():
seq.makeInactive()
seq_string_list.append(self.encode(seq))
sequence_list.append(seq)
results_list = find_generalized_pattern(seq_string_list, pattern)
if results_list:
for index, match_list in enumerate(results_list):
seq = sequence_list[index]
residues = seq.gaplessResidues()
for start, end in match_list:
for ridx in range(start, end):
residues[ridx].selected = True
count += 1
return results_list
###########################################################################
# Sequence group sorting methods.
###########################################################################
[docs] def sortKeyName(self, sequence):
"""
Returns sequence name sort key.
:rtype: string
:return: sequence name sort key
"""
return sequence.name
[docs] def sortKeyChain(self, sequence):
"""
Returns sequence chain ID sort key.
:rtype: string
:return: sequence chain ID sort key
"""
return sequence.chain_id
[docs] def sortKeyLength(self, sequence):
"""
Returns sequence length sort key.
:rtype: int
:return: sequence length sort key
"""
return sequence.gaplessLength()
[docs] def sortKeyGaps(self, sequence):
"""
Returns sequence number of gaps sort key.
:rtype: int
:return: sequence number of gaps sort key
"""
return sequence.numberOfGaps()
[docs] def sortKeyIdentity(self, sequence):
"""
Returns sequence identity with consesus sequence sort key.
:rtype: float
:return: sequence identity with consesus sequence sort key
"""
return sequence.identity
[docs] def sortKeyHomology(self, sequence):
"""
Returns sequence homology with reference sequence sort key.
:rtype: float
:return: sequence homology with reference sequence sort key
"""
return sequence.homology
[docs] def sortKeySimilarity(self, sequence):
"""
Returns sequence similarity to the consesus sequence as a sort key.
:rtype: float
:return: sequence similarity to the consesus sequence sort key
"""
return sequence.similarity
[docs] def sortKeyScore(self, sequence):
"""
Returns sequence score sort key.
:rtype: float
:return: sequence score sort key
"""
return sequence.score
[docs] def getSortableSequences(self, ignore_reference=True):
"""
Returns a list of "sortable" sequences, i.e. all parent sequences
that are not auxiliary objects and not global annotations.
:type: ignore_reference: boolean
:param: Normally, the reference is not considered a sortable sequence,
unless ignore_reference is set to False. In such case the
reference will be included in the sortable group.
:rtype: list of `Sequence`
:return: list of sortable sequences
"""
# Extract non-global sequences.
my_reference = None
if ignore_reference:
my_reference = self.reference
return [
seq for seq in self.sequences
if seq.isSortable(reference=my_reference)
]
[docs] def replaceSortableSequences(self, sequence_list, ignore_reference=True):
"""
This method replaces all "sortable" sequences with a list of
sorted sequences. The length of the given list is supposed to have
a number of items equal to the number of sortable sequences.
This method is used to replace sequences in the group after
sorting operation.
:type sequence_list: list of `Sequence`
:param sequence_list: replacement list of sequences
:type: ignore_reference: boolean
:param: Normally, the reference is not considered a sortable sequence,
unless ignore_reference is set to False. In such case the
reference will be included in the sortable group.
"""
# Extract non-global sequences.
my_reference = None
if ignore_reference:
my_reference = self.reference
list_index = 0
list_size = len(sequence_list)
for pos in range(len(self.sequences)):
seq = self.sequences[pos]
if seq.isSortable(reference=my_reference):
if list_index < list_size:
self.sequences[pos] = sequence_list[list_index]
list_index += 1
[docs] def sort(self, order, reverse_order=False):
"""
This method sorts the sequences according to a specified order.
Optionally, the sequences can be sorted in a reverse order.
:type order: int
:param order: sort order
:type reverse_order: bool
:param reverse_order: if True, the sequences will be reverse sorted
(default=False, i.e. sorting from smallest to largest key)
"""
# Calculate profile, just to be up-to-date.
self.calculateProfile()
sortable = self.getSortableSequences()
# Sort the sequences according to specified sort order.
if order == constants.SORT_NAME:
sortable.sort(key=self.sortKeyName)
elif order == constants.SORT_CHAIN:
sortable.sort(key=self.sortKeyChain)
elif order == constants.SORT_LENGTH:
sortable.sort(key=self.sortKeyLength)
elif order == constants.SORT_GAPS:
sortable.sort(key=self.sortKeyGaps)
elif order == constants.SORT_IDENTITY:
sortable.sort(key=self.sortKeyIdentity)
elif order == constants.SORT_HOMOLOGY:
sortable.sort(key=self.sortKeyHomology)
elif order == constants.SORT_SIMILARITY:
sortable.sort(key=self.sortKeySimilarity)
elif order == constants.SORT_SCORE:
sortable.sort(key=self.sortKeyScore)
if reverse_order:
sortable.reverse()
# Replace non-global sequences with the sorted ones.
sorted_pos = 0
for pos in range(len(self.sequences)):
seq = self.sequences[pos]
if seq.isSortable(reference=self.reference):
self.sequences[pos] = sortable[sorted_pos]
sorted_pos += 1
self.updateReference()
[docs] def moveUp(self):
"""
Moves selected sequences to the top of the group.
"""
sortable = self.getSortableSequences()
if sortable and self.hasSelectedSequences():
for seq in sortable:
if seq.selected:
pos = sortable.index(seq)
if pos > 0:
sortable.remove(seq)
sortable.insert(pos - 1, seq)
self.replaceSortableSequences(sortable)
else:
for seq in self.sequences:
n_children = len(seq.children)
for index in range(n_children):
child = seq.children[index]
if child.selected and index:
seq.children.remove(child)
seq.children.insert(index - 1, child)
[docs] def moveDown(self):
"""
Moves selected sequences to the bottom of the group.
"""
sortable = self.getSortableSequences()
if sortable and self.hasSelectedSequences():
for seq in reversed(sortable):
if seq.selected:
pos = sortable.index(seq)
sortable.remove(seq)
sortable.insert(pos + 1, seq)
self.replaceSortableSequences(sortable)
else:
for seq in self.sequences:
n_children = len(seq.children)
for index in range(n_children - 1, -1, -1):
child = seq.children[index]
if child.selected and index < n_children - 1:
seq.children.remove(child)
seq.children.insert(index + 1, child)
[docs] def moveTop(self, target=None):
"""
Moves selected sequences to the top of the group.
If target is specified, move only the target sequence to top.
"""
if target == self.reference:
# When target is reference, include the reference in the
# sortable group.
sortable = self.getSortableSequences(ignore_reference=False)
else:
sortable = self.getSortableSequences()
if target and target in sortable:
sortable.remove(target)
sortable.insert(0, target)
sorted_list = sortable
else:
sorted_list = []
for seq in sortable:
if seq.selected:
sorted_list.append(seq)
for seq in sortable:
if not seq.selected:
sorted_list.append(seq)
if target == self.reference:
self.replaceSortableSequences(sorted_list, ignore_reference=False)
else:
self.replaceSortableSequences(sorted_list)
[docs] def moveBottom(self):
"""
Moves selected sequences to the bottom of the group.
"""
sortable = self.getSortableSequences()
sorted_list = []
for seq in sortable:
if not seq.selected:
sorted_list.append(seq)
for seq in sortable:
if seq.selected:
sorted_list.append(seq)
self.replaceSortableSequences(sorted_list)
[docs] def sortByTreeOrder(self):
"""
Sorts the sequences by the tree order. If there is no tree,
returns False.
:return: True on success, False if valid tree doesn't exist.
:rtype: boolean
"""
if not self.tree:
return False
tree_list = []
tree = self.tree.getVisibleTree()
tree_list = tree.getSequenceList(tree_list)
if len(tree_list) == 0:
return False
# Replace all non-global sequences with sorted list.
sorted_pos = 0
for pos in range(min(len(self.sequences), len(tree_list))):
seq = self.sequences[pos]
if seq.isValidProtein():
self.sequences[pos] = tree_list[sorted_pos]
sorted_pos += 1
return True
[docs] def duplicateSelectedSequences(self):
"""
Duplicates selected sequences.
"""
copy_list = []
for index, seq in enumerate(self.sequences):
if seq.selected:
copy_list.append((index + len(copy_list), seq.copyForUndo()))
for index, seq in copy_list:
self.sequences.insert(index, seq)
[docs] def getGlobalAnnotation(self, annotation_type):
return next((seq for seq in self.sequences
if seq.annotation_type == annotation_type), None)
[docs] def addAnnotation(self, annotation_type, remove=False):
"""
Adds an annotation sequence to selected sequences or to all sequences
if no sequence is selected.
:type annotation_type: int
:param annotation_type: type of the annotation sequence
"""
has_selected = self.hasSelectedSequences()
seq = None
if remove:
for seq in self.sequences:
if (not has_selected or seq.selected) and \
seq.isValidProtein() and \
seq.hasAnnotationType(annotation_type):
for child in seq.children:
if child.annotation_type == annotation_type:
seq.children.remove(child)
return
if annotation_type == constants.ANNOTATION_HELIX_PROPENSITY:
# Add helix propensity annotation.
for seq in self.sequences:
if (not has_selected or seq.selected) and \
seq.isValidProtein() and \
not seq.hasAnnotationType(annotation_type):
plot = seq.createAnnotationSequence()
plot.name = "Helix Propensity"
plot.short_name = "Helix Propensity"
plot.annotation_type = annotation_type
plot.plot_style = constants.PLOT_COLOR_BLOCKS
for pos in range(len(seq.residues)):
parent_res = seq.residues[pos]
res = plot.residues[pos]
res.color = parent_res.colorHelixPropensity()
elif annotation_type == constants.ANNOTATION_STRAND_PROPENSITY:
# Add strand propensity annotation.
for seq in self.sequences:
if (not has_selected or seq.selected) and \
seq.isValidProtein() and \
not seq.hasAnnotationType(annotation_type):
plot = seq.createAnnotationSequence()
plot.name = "Strand Propensity"
plot.short_name = "Strand Propensity"
plot.annotation_type = annotation_type
plot.plot_style = constants.PLOT_COLOR_BLOCKS
for pos in range(len(seq.residues)):
parent_res = seq.residues[pos]
res = plot.residues[pos]
res.color = parent_res.colorStrandPropensity()
elif annotation_type == constants.ANNOTATION_TURN_PROPENSITY:
# Add turn propensity annotation.
for seq in self.sequences:
if (not has_selected or seq.selected) and \
seq.isValidProtein() and \
not seq.hasAnnotationType(annotation_type):
plot = seq.createAnnotationSequence()
plot.name = "Turn Propensity"
plot.short_name = "Turn Propensity"
plot.annotation_type = annotation_type
plot.plot_style = constants.PLOT_COLOR_BLOCKS
for pos in range(len(seq.residues)):
parent_res = seq.residues[pos]
res = plot.residues[pos]
res.color = parent_res.colorTurnPropensity()
elif annotation_type == constants.ANNOTATION_STERIC_GROUP:
# Add strand propensity annotation.
for seq in self.sequences:
if (not has_selected or seq.selected) and \
seq.isValidProtein() and \
not seq.hasAnnotationType(annotation_type):
plot = seq.createAnnotationSequence()
plot.name = "Steric Group"
plot.short_name = "Steric Group"
plot.annotation_type = annotation_type
plot.plot_style = constants.PLOT_COLOR_BLOCKS
for pos in range(len(seq.residues)):
parent_res = seq.residues[pos]
res = plot.residues[pos]
res.color = parent_res.colorStericGroup()
elif annotation_type == constants.ANNOTATION_HELIX_TERMINATORS:
for seq in self.sequences:
if (not has_selected or seq.selected) and \
seq.isValidProtein() and \
not seq.hasAnnotationType(annotation_type):
plot = seq.createAnnotationSequence()
plot.name = "Helix Breaking"
plot.short_name = "Helix Breaking"
plot.annotation_type = annotation_type
plot.plot_style = constants.PLOT_COLOR_BLOCKS
for pos in range(len(seq.residues)):
parent_res = seq.residues[pos]
res = plot.residues[pos]
res.color = parent_res.colorHelixTerminators()
elif annotation_type == constants.ANNOTATION_SIDECHAIN_CHEMISTRY:
for seq in self.sequences:
if (not has_selected or seq.selected) and \
seq.isValidProtein() and \
not seq.hasAnnotationType(annotation_type):
plot = seq.createAnnotationSequence()
plot.name = "Sidechain Chemistry"
plot.short_name = "Sidechain Chemistry"
plot.annotation_type = annotation_type
plot.plot_style = constants.PLOT_COLOR_BLOCKS
for pos in range(len(seq.residues)):
parent_res = seq.residues[pos]
res = plot.residues[pos]
res.color = parent_res.colorSidechainChemistry()
elif annotation_type == constants.ANNOTATION_EXPOSURE_TENDENCY:
for seq in self.sequences:
if (not has_selected or seq.selected) and \
seq.isValidProtein() and \
not seq.hasAnnotationType(annotation_type):
plot = seq.createAnnotationSequence()
plot.name = "Exposure Tendency"
plot.short_name = "Exposure Tendency"
plot.annotation_type = annotation_type
plot.plot_style = constants.PLOT_COLOR_BLOCKS
for pos in range(len(seq.residues)):
parent_res = seq.residues[pos]
res = plot.residues[pos]
res.color = parent_res.colorExposureTendency()
elif annotation_type == constants.ANNOTATION_BFACTOR:
# Add strand propensity annotation.
for seq in self.sequences:
if (not has_selected or seq.selected) and seq.visible and \
seq.has_structure and seq.isValidProtein() and \
not seq.hasAnnotationType(annotation_type):
plot = seq.createAnnotationSequence()
plot.name = "B factor"
plot.short_name = "B factor"
plot.annotation_type = annotation_type
plot.plot_style = constants.PLOT_HISTOGRAM
plot.height = 3
plot.plot_color = (128, 160, 240)
for pos in range(len(seq.residues)):
parent_res = seq.residues[pos]
res = plot.residues[pos]
res.value = parent_res.bfactor
plot.calculatePlotValues(0)
elif annotation_type == constants.ANNOTATION_HYDROPHOBICITY:
# Add strand propensity annotation.
for seq in self.sequences:
if (not has_selected or seq.selected) and \
seq.isValidProtein() and \
not seq.hasAnnotationType(annotation_type):
plot = seq.createAnnotationSequence()
plot.name = "Hydrophobicity"
plot.short_name = "Hydrophobicity"
plot.annotation_type = annotation_type
plot.plot_style = constants.PLOT_HISTOGRAM
plot.height = 3
plot.plot_color = (240, 160, 240)
amino_acids = list(constants.HYDROPHOBICITY_SCALES)
for pos in range(len(seq.residues)):
parent_res = seq.residues[pos]
res = plot.residues[pos]
if parent_res.code in amino_acids:
res.value = constants.HYDROPHOBICITY_SCALES[
parent_res.code][constants.KD_SCALE]
plot.calculatePlotValues(2)
elif annotation_type == constants.ANNOTATION_PI:
# Add isoelectric point annotation.
for seq in self.sequences:
if (not has_selected or seq.selected) and \
seq.isValidProtein() and \
not seq.hasAnnotationType(annotation_type):
plot = seq.createAnnotationSequence()
plot.name = "Isoelectric Point"
plot.short_name = "Isoelectric Point"
plot.annotation_type = annotation_type
plot.plot_style = constants.PLOT_HISTOGRAM
plot.height = 3
plot.plot_color = (160, 240, 192)
amino_acids = list(constants.PI_SCALE)
for pos in range(len(seq.residues)):
parent_res = seq.residues[pos]
res = plot.residues[pos]
if parent_res.code in amino_acids:
res.value = constants.PI_SCALE[parent_res.code]
plot.calculatePlotValues(2)
elif annotation_type == constants.ANNOTATION_IDENTITY:
# Add strand propensity annotation.
for seq in self.sequences:
if (not has_selected or seq.selected) and \
seq.isValidProtein() and \
not seq.hasAnnotationType(annotation_type):
plot = seq.createAnnotationSequence()
plot.name = "Identity"
plot.short_name = "Identity"
plot.annotation_type = annotation_type
plot.plot_style = constants.PLOT_LINE
plot.height = 3
amino_acids = list(constants.HYDROPHOBICITY_SCALES)
for pos in range(len(seq.residues)):
parent_res = seq.residues[pos]
res = plot.residues[pos]
res.is_gap = parent_res.is_gap
if parent_res.code in amino_acids:
res.value = \
constants.HYDROPHOBICITY_SCALES[
parent_res.code][
constants.KD_SCALE]
plot.calculatePlotValues(2)
elif annotation_type == constants.ANNOTATION_RESNUM:
# Add residue number annotation
for seq in self.sequences:
if (not has_selected or seq.selected) and \
seq.isValidProtein() and \
not seq.hasAnnotationType(annotation_type):
plot = seq.createAnnotationSequence()
plot.name = "Residue Numbers"
plot.short_name = ""
plot.annotation_type = annotation_type
elif annotation_type == constants.ANNOTATION_SSBOND:
# Add SSBond annotation
for seq in self.sequences:
if (not has_selected or seq.selected) and \
seq.isValidProtein() and \
not seq.hasAnnotationType(annotation_type):
if not seq.ssb_bond_list:
continue
plot = seq.createSSBondAssignment()
seq.children.insert(0, plot)
plot.annotation_type = annotation_type
plot.height = int(len(seq.ssb_bond_list) * 0.5 + 0.5)
plot.parent_sequence = seq
if seq:
seq.propagateGapsToChildren()
[docs] def addCustomAnnotation(self,
sequence=None,
title="Custom Annotation",
name="",
region_list=[]): # noqa: M511
"""
This function adds or updates a custom annotation in the specified
sequence.
:param region_list: List of residue regions. Each item should include:
(first_res_id, last_res_id, label, color)
"""
for plot in sequence.children:
if plot.annotation_type == constants.ANNOTATION_CUSTOM:
sequence.children.remove(plot)
break
for seq in self.sequences:
if seq == sequence and seq.isValidProtein() and \
not seq.hasAnnotationType(constants.ANNOTATION_CUSTOM):
plot = seq.createAnnotationSequence()
plot.name = title
plot.short_name = name
plot.annotation_type = constants.ANNOTATION_CUSTOM
plot_residues = plot.gaplessResidues()
for first, last, label, color in region_list:
lab_idx = 0
for index in range(first, last + 1):
if index < len(plot_residues):
plot_residues[index].color = color
plot_residues[index].code = ' '
if lab_idx < len(label):
plot_residues[index].code = label[lab_idx]
lab_idx += 1
# else:
# plot_residues[index].color = res.color
# Assign gaps
for pos in range(len(seq.residues)):
parent_res = seq.residues[pos]
res = plot.residues[pos]
res.inverted = True
res.is_gap = parent_res.is_gap
[docs] def selectRegions(self, sequence, region_list):
"""
Select residue subsets based on a region list.
"""
for first, last, label, color in region_list:
for index in range(first, last + 1):
sequence.residues[index].selected = True
[docs] def selectRedundantSequences(self, value, columns=False, reference=None):
"""
Selects sequences below a specified identity threshold value.
"""
aa_sequences = [seq for seq in self.sequences if seq.isValidProtein()]
n_aa_sequences = len(aa_sequences)
if n_aa_sequences == 0:
return
for seq in aa_sequences:
seq.selected = False
if reference:
for n in range(n_aa_sequences):
id = aa_sequences[n].calcIdentity(reference, self.consider_gaps,
columns)
if id * 100 >= value:
aa_sequences[n].selected = True
else:
for n in range(n_aa_sequences):
for m in range(n):
id = aa_sequences[n].calcIdentity(aa_sequences[m],
self.consider_gaps,
columns)
if id * 100 >= value:
len_n = aa_sequences[n].gaplessLength()
len_m = aa_sequences[m].gaplessLength()
if len_n < len_m:
aa_sequences[n].selected = True
else:
aa_sequences[m].selected = True
[docs] def clearConstraints(self):
for seq in self.sequences:
if seq.type == constants.SEQ_CONSTRAINTS:
seq.constraint_list = []
break
[docs] def removeConstraints(self):
for seq in self.sequences:
if seq.type == constants.SEQ_CONSTRAINTS:
self.sequences.remove(seq)
break
[docs] def hasConstraints(self):
for seq in self.sequences:
if seq.type == constants.SEQ_CONSTRAINTS:
if len(seq.constraint_list) > 0:
return True
return False
[docs] def addConstraint(self, seq1, pos1, seq2, pos2, for_prime=False):
"""
Adds a pairwise alignment constraint. At least one of the specified
sequences has to be a reference sequence.
:rtype: boolean
:return: True if the constrait was successfully added, False otherwise
"""
if self.reference not in self.sequences:
return False
index = self.sequences.index(self.reference)
next_seq = None
for seq in self.sequences[index + 1:]:
if seq.isValidProtein():
next_seq = seq
break
if not next_seq:
return False
cons_seq = None
for seq in self.sequences:
if seq.type == constants.SEQ_CONSTRAINTS:
cons_seq = seq
break
if not cons_seq:
cons_seq = Sequence()
cons_seq.constraint_list = []
cons_seq.type = constants.SEQ_CONSTRAINTS
cons_seq.height = 2
cons_seq.name = cons_seq.short_name = "Constraints"
self.sequences.insert(index + 1, cons_seq)
if seq1 is None and seq2 is None:
return True
if seq1 != self.reference and seq2 != self.reference:
return False
if seq2 == self.reference:
# Reorder if seq1 is not a reference.
tmp = seq1
seq1 = seq2
seq2 = tmp
tmp = pos1
pos1 = pos2
pos2 = tmp
if for_prime:
seq1 = self.reference
seq2 = self.reference
next_seq = self.reference
if pos2 > pos1:
tmp = pos2
pos2 = pos1
pos1 = tmp
ref_pos = pos1
pos = pos2
if ref_pos is None or pos is None:
return False
real_ref_pos = self.reference.getUngappedIndex(ref_pos)
real_pos = next_seq.getUngappedIndex(pos)
for constraint in cons_seq.constraint_list:
start, end, seq, prime = constraint
if start == real_ref_pos and end == real_pos:
cons_seq.constraint_list.remove(constraint)
return
cons_seq.constraint_list.append(
(real_ref_pos, real_pos, next_seq, for_prime))
return True
[docs] def statistics(self):
total, selected, hidden, maestro = (0, 0, 0, 0)
for seq in self.sequences:
if not seq.type == constants.SEQ_RULER and \
not seq.type == constants.SEQ_SEPARATOR:
if not seq.visible:
hidden += 1
if seq.selected:
selected += 1
if seq.from_maestro:
maestro += 1
total += 1
return total, selected, maestro, hidden
[docs] def setReference(self, ref=None):
"""
Sets a reference sequence.
"""
# To avoid any confusion, just remove the constraints.
self.removeConstraints()
if ref and ref in self.sequences and (ref.isValidProtein() or ref.type
== constants.SEQ_CONSENSUS):
self.reference = ref
self.moveTop(target=ref)
return True
elif self.hasSelectedSequences():
for seq in self.sequences:
if seq.selected:
if seq.type == constants.SEQ_CONSENSUS or \
seq.isValidProtein():
self.reference = seq
self.moveTop(target=seq)
return True
return False
[docs] def removeMaestroSequences(self):
"""
Removes all Maestro sequences.
"""
self.sequences = [seq for seq in self.sequences if not seq.from_maestro]
self.removeLonelyRuler()
[docs] def removeLonelyRuler(self):
"""
Remove ruler but only if there is nothing else left.
:rtype: bool
:return: True if the ruler was removed
"""
seq = None
for seq in self.sequences:
if seq.type != constants.SEQ_RULER:
return False
if seq:
self.sequences = []
return True
[docs] def toggleHistory(self):
"""
Adds or removes a sequence modification history string.
"""
for seq in self.sequences:
if seq.type == constants.SEQ_HISTORY:
self.sequences.remove(seq)
return
history = Sequence()
history.type = constants.SEQ_HISTORY
history.name = "History of Changes"
history.short_name = history.name[:]
self.sequences.append(history)
[docs] def resetHistory(self):
"""
This function resets change tracking string.
"""
history = None
for seq in self.sequences:
if seq.type == constants.SEQ_HISTORY:
history = seq
break
if history:
for res in history.residues:
res.value = 0.0
res.color = (255, 255, 255)
# if self.viewer:
# self.viewer.updateView()
[docs] def updateHistory(self, start, end):
"""
This function updates an internal history string.
:rtype: bool
:return: True if the history was updated, False otherwise.
"""
history = None
for seq in self.sequences:
if seq.type == constants.SEQ_HISTORY:
history = seq
break
if history:
if end < start:
tmp = start
start = end
end = tmp
if end >= history.length():
new_length = end - history.length() + 1
for pos in range(new_length):
res = Residue()
res.sequence = history
res.color = (255, 255, 255)
res.value = 0.0
res.code = ' '
res.inverted = True
history.residues.append(res)
for res in history.residues:
res.value -= 0.1
if res.value < 0.0:
res.value = 0.0
elif res.value < 0.2:
res.value = 0.2
color = int(255 * (1.0 - res.value))
res.color = (color, color, color)
for pos in range(start, end):
res = history.residues[pos]
res.color = (0, 0, 0)
res.value = 1.0
# Update the viewer.
# if self.viewer:
# self.viewer.updateView()
return True
return False
[docs] def setConsiderGaps(self, value):
"""
Sets value of consider gaps flag. If set to True, gaps will be
included in calculation of local sequence similarity measures.
:type value: bool
:param value: Should we consider gaps for sequence identity
calculations.
"""
self.consider_gaps = value
self.calculateProfile()
self.updateVariableSequences()
# if self.viewer:
# self.viewer.updateView()
[docs] def updateSSA(self, remove=False):
"""
Updates secondary structure assignments for all structure-coupled
sequences.
"""
any_selected = self.hasSelectedSequences()
for seq in self.sequences:
if not any_selected or seq.selected:
if seq.has_structure:
for child in seq.children:
if child.type == constants.SEQ_SECONDARY:
seq.children.remove(child)
break
if remove:
continue
ssa = seq.createSecondaryAssignment()
for pos in range(len(ssa.residues)):
ss_res = ssa.residues[pos]
ss_res.code = seq.residues[pos].ss_code
if not seq.residues[pos].is_gap:
ss_res.inverted = True
if ss_res.code == 'H':
ss_res.color = constants.SS_COLOR_HELIX
elif ss_res.code == 'E':
ss_res.color = constants.SS_COLOR_EXTENDED
else:
ss_res.color = constants.SS_COLOR_COIL
seq.children.insert(0, ssa)
ssa.parent_sequence = seq
seq.propagateGapsToChildren()
# if self.viewer:
# self.viewer.updateView()
[docs] def expandSelection(self):
"""
Expands selection to include entire columns.
"""
no_selected = not self.hasSelectedSequences()
for pos in range(self.findMaxLength()):
for seq in self.sequences:
if pos < seq.length():
if seq.selected or no_selected:
if seq.residues[pos].selected:
self.selectColumns(pos, pos)
break
[docs] def expandSelectionRef(self):
"""
Expands selection from the reference sequence to include entire columns.
"""
if not self.reference:
return
for pos in range(self.reference.length()):
if self.reference.residues[pos].selected:
self.selectColumns(pos, pos)
[docs] def hideColumns(self, unselected=False):
"""
Hides selected or un-selected columns.
"""
max = self.findMaxLength()
# Just to be sure we are not going to loose any residues here.
self.showAllResidues()
for pos in range(max):
if self.isColumnSelected(pos):
if not unselected:
self.hideColumn(pos)
elif unselected:
self.hideColumn(pos)
for seq in self.sequences:
if seq.isValidProtein():
seq.tmp_residues = copy.copy(seq.residues)
seq.residues = [res for res in seq.residues if res.visible]
seq.tmp_children = []
for child in seq.children:
seq.tmp_children.append(copy.copy(child.residues))
child.residues = [
res for res in child.residues if res.visible
]
[docs] def isColumnSelected(self, pos, weak=False):
"""
Returns True if the specified column is selected, False otherwise.
:type weak: bool
:param weak: If weak is False (default) treat the column as selected
only if all residues in the column are selected.
"""
for seq in self.sequences:
if seq.isValidProtein():
if pos < seq.length():
if not weak:
if not seq.residues[pos].selected:
return False
else:
if seq.residues[pos].selected:
return True
return not weak
[docs] def hideColumn(self, pos):
"""
Hides the specified column.
"""
for seq in self.sequences:
if seq.isValidProtein():
if pos < seq.length():
seq.residues[pos].visible = False
for child in seq.children:
child.residues[pos].visible = False
[docs] def showAllResidues(self):
"""
Makes all residues in all sequences visible.
"""
for seq in self.sequences:
if seq.isValidProtein():
if seq.tmp_residues:
seq.residues = seq.tmp_residues
for res in seq.residues:
res.visible = True
seq.tmp_residues = None
if seq.tmp_children:
for index, child in enumerate(seq.children):
child.residues = seq.tmp_children[index]
seq.tmp_children = None
for child in seq.children:
for res in child.residues:
res.visible = True
[docs] def markResidues(self, rgb):
for seq in self.sequences:
if seq.isValidProtein():
for res in seq.residues:
if res.selected:
res.marked_color = rgb
[docs] def clearMarkedResidues(self):
no_selected = not self.hasSelectedResidues()
for seq in self.sequences:
if seq.isValidProtein():
for res in seq.residues:
if no_selected or res.selected:
res.marked_color = None
[docs] def cropSelectedResidues(self):
if self.hasSelectedResidues():
self.invertSelection()
self.deleteSelectedResidues()
[docs] def markTemplateRegion(self):
for seq in self.sequences:
if seq.has_structure and \
seq != self.reference:
for index, res in enumerate(seq.residues):
if res.selected:
res.model = True
res.selected = False
for seq2 in self.sequences:
if seq != seq2:
if index < seq2.length():
seq2.residues[index].model = False
# msv_maestro.propagateColorsToMaestro(
# self.viewer, seq, template_colors=True)
self.unselectAll()
[docs] def unmarkTemplateRegions(self):
for seq in self.sequences:
if seq.has_structure and seq != self.reference:
for res in seq.residues:
res.model = False
# msv_maestro.propagateColorsToMaestro(self.viewer,
# seq, template_colors=True)
self.unselectAll()
[docs] def getStructureList(self, omit_reference=False):
"""
Returns a list of visible sequences associated with structures.
"""
return [
seq for seq in self.sequences
if seq.has_structure and seq.visible and
(not omit_reference or (seq != self.reference))
]
[docs] def getMaestroSequencesForEntryId(self, entry_id):
seq_list = []
for seq in self.sequences:
if seq.from_maestro and seq.maestro_entry_id == entry_id:
seq_list.append(seq)
return seq_list
[docs] def colorSequenceNames(self, color):
no_selected = not self.hasSelectedSequences()
for seq in self.sequences:
if no_selected or seq.selected:
seq.color = color
children_selected = seq.hasSelectedChildren()
for child in seq.children:
if not children_selected or child.selected:
child.color = color
[docs] def selectFirstTemplate(self, n_templates=1):
"""
Selects first available valid template from a template list.
If a template is alread selected, do nothing. Optionally, select
n_templates valid templates instead of just the first one.
"""
for seq in self.sequences:
if seq.selected and seq.isValidTemplate(reference=self.reference):
return
n = 0
for seq in self.sequences:
if seq.isValidTemplate(reference=self.reference):
seq.selected = True
n += 1
if n == n_templates:
return
[docs] def hasValidTemplates(self):
for seq in self.sequences:
if seq.isValidTemplate(reference=self.reference):
return True
return False
[docs] def getTemplates(self, selected_only=False):
"""
Return a list of
`sequences<schrodinger.ui.sequencealignment.sequence.Sequence`
that are valid templates.
"""
templates = []
for seq in self.sequences:
if selected_only and not seq.selected:
continue
if seq.isValidTemplate(reference=self.reference):
templates.append(seq)
return templates
[docs] def copySequences(self, group):
not_selected = not group.hasSelectedSequences()
for seq in group.sequences:
if (seq.selected or not_selected) and \
seq.type != constants.SEQ_RULER:
seq_copy = seq.copyForUndo()
self.sequences.append(seq_copy)
[docs] def calculateMatrix(self):
"""
Calculates a substitution matrix based on the current alignment.
"""
# Calculate expected frequencies
p_exp = {}
amino_acids = list(constants.AMINO_ACIDS)
for aa in amino_acids:
p_exp[aa] = 0.0
total = 1.0
for seq in self.sequences:
if seq.isValidProtein():
for res in seq.residues:
if res.code in p_exp:
p_exp[res.code] += 1.0
total += 1.0
for aa in list(p_exp):
p_exp[aa] /= total
# Calculate target (observed) pair frequencies
q = {}
m = {}
for aa in amino_acids:
for ab in amino_acids:
q[aa + ab] = 0.0
m[aa + ab] = 0.0
total = 0.0
max_length = self.findMaxLength()
for pos in range(max_length):
for seq1 in self.sequences:
if seq1.isValidProtein() and pos < seq1.length():
code1 = seq1.residues[pos].code
for seq2 in self.sequences:
if seq2.isValidProtein and pos < seq2.length():
code2 = seq2.residues[pos].code
cc = code1 + code2
if cc in q:
q[cc] += 1.0
total += 1.0
for aa in amino_acids:
for ab in amino_acids:
# NB: If the group is empty this causes a divide by zero error
q[aa + ab] /= total
if q[aa + ab] > 0.0:
m[aa + ab] = math.log(
old_div(q[aa + ab], (p_exp[aa] * p_exp[ab])))
return m
[docs] def alignByResidueNumbers(self):
"""
Aligns the sequences so that identical residue numbers
are lined up in columns.
"""
minres = maxres = None
seq_list = []
new_res_list = []
# Find residue number range
for seq in self.sequences:
if seq.isValidProtein():
seq_list.append(seq)
seq.residues = seq.gaplessResidues()
new_res_list.append([])
for res in seq.residues:
if minres is None:
minres = res.num
elif minres > res.num:
minres = res.num
if maxres is None:
maxres = res.num
elif maxres < res.num:
maxres = res.num
# Iterate over residue number range
resnum = minres
while resnum <= maxres:
found = True
while found:
found = False
# Check if there are any residues at this position
for index, seq in enumerate(seq_list):
if seq.residues:
if seq.residues[0].num <= resnum:
found = True
break
if found:
# If yes, pick the residue if resnum matches
# or insert a gap if not
for index, seq in enumerate(seq_list):
if seq.residues:
if seq.residues[0].num <= resnum:
new_res_list[index].append(seq.residues.pop(0))
else:
gap = Residue()
gap.sequence = seq
gap.is_gap = True
gap.code = constants.UNLOCKED_GAP_SYMBOL
new_res_list[index].append(gap)
resnum += 1
for index, seq in enumerate(seq_list):
if seq.isValidProtein():
seq.residues = new_res_list[index]
seq.propagateGapsToChildren()
[docs] def addCustomSequence(self):
sequence = Sequence()
sequence.type = constants.SEQ_CUSTOM
sequence.name = ""
sequence.short_name = "Custom annotation"
max_length = self.findMaxLength()
sequence.residues = [
Residue(sequence=sequence,
code=' ',
inverted=False,
color=self.inv_background_color) for x in range(max_length)
]
self.sequences.append(sequence)
#USED ONLY IN TESTING! (delete later)
[docs] def getSequenceNameList(self):
"""
Returns a list of sequence names.
"""
name_list = []
for seq in self.sequences:
name_list.append(seq.name)
return name_list
[docs] def repair(self):
"""
Repairs the group by setting sequence-residue associations
for all sequences in group. Also, adds missing attributes
(using default values) to the group.
Useful if the group comes from a corrupted project file.
"""
# Create dummy objects
empty_sequence = Sequence()
empty_residue = Residue()
for seq in self.sequences:
# Add missing sequence attributes
for attr in list(empty_sequence.__dict__):
if not hasattr(seq, attr):
setattr(seq, attr, getattr(empty_sequence, attr))
for res in seq.residues:
# Update residue parent
res.sequence = seq
# Add missing residue attributes
for attr in list(empty_residue.__dict__):
if not hasattr(res, attr):
setattr(res, attr, getattr(empty_residue, attr))
for child in seq.children:
# Add missing child sequence attributes
for attr in list(empty_sequence.__dict__):
if not hasattr(child, attr):
setattr(child, attr, getattr(empty_sequence, attr))
for child_res in child.residues:
# Update child residue parent
child_res.sequence = child
# Add missing child residue attributes
for attr in list(empty_residue.__dict__):
if not hasattr(child_res, attr):
setattr(child_res, attr,
getattr(empty_residue, attr))
# Add missing group attributes
empty_group = SequenceGroup()
for attr in list(empty_group.__dict__):
if not hasattr(self, attr):
setattr(self, attr, getattr(empty_group, attr))