Source code for schrodinger.ui.sequencealignment.sequence

"""
Implementation of multiple sequence viewer Sequence class.

Copyright Schrodinger, LLC. All rights reserved.
"""

# Contributors: Piotr Rotkiewicz

import copy
import re
from past.utils import old_div

from . import constants
from . import utils
from .residue import Residue


[docs]def delete_from_str(inp_str, delete_chars): """ Delete characters from a string. Note: replaces Python 2 inp_str.translate(None, delete_chars) :param inp_str: A string to delete characters from. In Python 2, unicode input will be cast to str :type inp_str: str :param delete_chars: Characters to delete from the string :type delete_chars: str :return: The input string with the delete_chars removed :rtype: str """ translator = str.maketrans('', '', delete_chars) return inp_str.translate(translator)
[docs]class Sequence(object): """ The sequence class represents a single basic sequence object. The Sequence object can correspond to amino acid sequence, nucleic acid sequence, annotation (such as secondary structure assignment or hydrophobicity plot) or helper object (for example, a ruler). """
[docs] def __init__(self): self.residues = [] #Actual sequence (list of Residues). #Temp list of residues. Used to store them in 'hidden columns' mode. self.tmp_residues = None self.tmp_children = None self.type = constants.SEQ_AMINO_ACIDS #Possible seq types in constants.py self.name = "" #Long seq name displayed in tooltip. self.short_name = "" #Short seq name displayed in list area. self.visible = True self.from_maestro = False #If seq has corresponding entry in Maestro. self.maestro_entry_id = None self.maestro_chain_name = None self.selected = False self.collapsed = False self.parent_sequence = None self.min_avg_value = 0.0 self.max_avg_value = 1.0 self.plot_style = constants.PLOT_HISTOGRAM #For annotation plots. self.plot_color = (64, 64, 64) self.annotation_type = None #of sequence self.global_sequence = False self.chain_id = ' ' #of sequence self.children = [] #children sequences self.color_scheme = constants.COLOR_SIDECHAIN_CHEMISTRY #of sequence self.height = 1 #in characters self.identity = 0.0 #seq identity between self and reference seq. self.similarity = 0.0 #seq similarity between self and reference seq. self.score = 0.0 #seq score between self and reference seq. self.homology = 0.0 #seq homology between self and reference seq. self.last_hidden = False #hidden sequence indicator. #If seq has PDB file or Maestro entry struct associated with it. self.has_structure = False #If sequence corresponds to entry currently included in Maestro wkspce. self.maestro_included = False self.color = (0, 0, 0) #RGB for displayed color of sequence name. self.parent_sequence = None #None if this is a parent sequence. self.ssb_bond_list = [] #SS bridges list self.custom_color = (255, 255, 255) #for the entire sequence.
[docs] def appendResidue(self, residue): """ Appends a new residue to self. :type residue : sequence alignment Residue object """ if residue: self.residues.append(residue)
[docs] def appendResidues(self, codes, use_numbers=False): """ Create new residues based on a single-code string and append them to existing sequence. Converts upper-case characters to lower-case, recognize gaps ('.', '-', '~') and ignore other characters. :param codes: single-code amino acid :type codes: string :type use_numbers: boolean :param use_numbers: If true, this function will try to recognize residue numbers included in the sequence and assign them to the residues. """ index = 1 if self.length(): index = self.residues[-1].num res_list = [] domidx = 1 nums = "" for ch in codes: if not ch: continue ch = str(ch).upper() if ch >= '0' and ch <= '9': nums += ch elif ch > ' ' and \ ((ch >= 'A' and ch <= 'Z') or ch in ['.', '-', '~']): if nums: index = int(nums) nums = "" res = Residue() res.num = index if ch in ['.', '-', '~']: ch = constants.UNLOCKED_GAP_SYMBOL res.is_gap = True else: index = index + 1 res.code = ch res.makeName() res.sequence = self res_list.append(res) self.residues += res_list
[docs] def removeStructureless(self): """ Removes structureless (SEQRES) residues from the sequence and its children. """ for child in self.children: child.residues = [ res for parent_res, res in zip(self.residues, child.residues) if not parent_res.structureless ] self.residues = [res for res in self.residues if not res.structureless]
[docs] def replaceSequence(self, new_sequence): """ This method replaces current sequence with the provided string. :type new_sequence: str :param new_sequence: Must be same gapless length as old sequence. :rtype: bool :return: True if successful """ self.removeStructureless() gapless = self.gaplessResidues() gapless_new = delete_from_str(new_sequence, ' .~-') if len(gapless) != len(gapless_new): # Something is wrong; gapless lengths don't match. return False pos = 0 new_residues = [] for c in new_sequence: if c in ".~-": res = Residue() res.is_gap = True res.code = '~' res.sequence = self else: res = gapless[pos] pos += 1 new_residues.append(res) self.residues = new_residues self.propagateGapsToChildren() return True
[docs] def toString(self, with_gaps=True): """ Returns a string representation of self. :type with_gaps: boolean (default=True) :param with_gaps: optional parameter, if True the returned string will include gaps, if False - only actual residue codes. """ out_string = "" for res in self.residues: if not res.is_gap or with_gaps: out_string += res.code return out_string
[docs] def text(self): """ Returns self as a string. """ return "".join([res.code for res in self.residues])
[docs] def gaplessText(self): """ Returns self as a gapless string. """ return "".join([res.code for res in self.residues if not res.is_gap])
[docs] def copyForUndo(self, deep_copy=True): if deep_copy: sequence_copy = copy.deepcopy(self) else: sequence_copy = copy.copy(self) return sequence_copy
[docs] def length(self): """ Returns a length of the sequence. :rtype: int :return: lengh of the sequence """ return len(self.residues)
[docs] def unpaddedLength(self): """ Returns a length of the sequence with rightmost gaps stripped out. :rtype: int :return: length of the stripped sequence """ total = len(self.residues) - 1 if total <= 0: return 0 while total and self.residues[total].is_gap: total -= 1 return total
[docs] def gaplessLength(self): """ Returns a length of the sequence excluding gaps. :rtype: int :return: actual sequence length (number of residues) """ return len(self.gaplessResidues())
[docs] def gaplessResidues(self): """ Returns a list of gapless residues. """ return [res for res in self.residues if not res.is_gap]
[docs] def numberOfGaps(self): """ Returns a number of gaps in the sequence. :rtype: int :return: number of gaps in the sequence """ return sum(1 for res in self.residues if res.is_gap)
[docs] def countActiveGaps(self, pos): count = 0 while pos < self.length(): if not self.residues[pos].active: break if self.residues[pos].is_gap: count += 1 pos += 1 if pos == self.length(): count = -1 return count
[docs] def getResidue(self, index, ungapped=False, hidden=True): """ Returns a residue at a given sequence position, or None if the given position is invalid. :type index: int :param index: sequence position :rtype: `Residue` :return: residue for a given position, or None if the position is invalid """ if hidden and self.tmp_residues: residues = self.tmp_residues else: residues = self.residues if ungapped: res_list = [res for res in residues if not res.is_gap] else: res_list = residues if index >= 0 and \ index < len(res_list): return res_list[index] return None
[docs] def getResidueIndex(self, id): """ Returns index of residue with given id :type id : string :param id : str(res.num) + str(res.icode) :rtype : int if valid id, None if not :return : index of res if valid id, None if not """ id = id.strip().rstrip() for index, res in enumerate(self.residues): if res.id().rstrip() == id: return index return None
[docs] def getUngappedIndex(self, index): """ Returns a residue index corresponding to ungapped position. :type index: int :param index: Residue index in gapped sequence :rtype: int :return: Index in ungapped sequence. """ if index < 0 or index >= self.length(): return -1 res = self.residues[index] if res.is_gap: return -1 res_list = self.gaplessResidues() return res_list.index(res)
[docs] def insertGaps(self, position, n_gaps, active=True): """ Inserts a specified number of gaps at a specified position. :type position: int :param position: sequence position where the gaps will be inserted :type n_gaps: int :param n_gaps: number of gaps to be inserted at the position :rtype: int :return: number of gaps actually inserted at the position """ n_inserted = 0 for gap in range(n_gaps): res = Residue() res.code = constants.UNLOCKED_GAP_SYMBOL res.is_gap = True res.sequence = self res.active = active self.residues.insert(position, res) n_inserted = n_inserted + 1 return n_inserted
[docs] def removeGaps(self, position, n_gaps): """ Removes a specified number of gaps (or less) at a given position, starting from position and going to C-terminus. (towards higher index) :type position: int :param position: sequence position from where the gaps will be removed :type n_gaps: int :param n_gaps: number of gaps to be removed at the position :rtype: int :return: number of gaps actually removed at the position """ n_removed = 0 for pos in range(position, len(self.residues)): if pos >= 0 and pos < self.length(): while pos >= 0 and pos < self.length() and \ self.residues[pos].is_gap and \ self.residues[pos].code == constants.UNLOCKED_GAP_SYMBOL: if not self.residues[pos].active: break self.residues.pop(pos) n_removed = n_removed + 1 n_gaps = n_gaps - 1 if n_gaps == 0: break if n_gaps == 0: break return n_removed
[docs] def removeGapsBackwards(self, position, n_gaps): """ Removes a specified number of gaps (or less) at a given position, starting at the position and going to N-terminus. (towards lower index) :type position: int :param position: sequence position from where the gaps will be removed :type n_gaps: int :param n_gaps: number of gaps to be removed at the position :rtype: int :return: number of gaps actually removed at the position """ n_removed = 0 for pos in range(position, -1, -1): if pos < 0 or pos >= len(self.residues): continue while self.residues[pos].is_gap and \ self.residues[pos].code == constants.UNLOCKED_GAP_SYMBOL: if not self.residues[pos].active: break self.residues.pop(pos) n_removed = n_removed + 1 n_gaps = n_gaps - 1 if n_gaps == 0: break if n_gaps == 0: break return n_removed
[docs] def removeAllGaps(self, selected_only=False): """ Removes all gaps from the sequence. If selected_only, only removes gaps if gaps are selected. """ any_selected = self.hasSelectedResidues() if selected_only and any_selected: self.residues = [ res for res in self.residues if not res.is_gap or not res.selected ] else: self.residues = self.gaplessResidues()
[docs] def unselectResidues(self): """ Unselects all residues in the sequence """ for res in self.residues: res.selected = False for child in self.children: for res in child.residues: res.selected = False
[docs] def selectAllResidues(self): for res in self.residues: res.selected = True for child in self.children: for res in child.residues: res.selected = True
[docs] def invertSelection(self): for res in self.residues: if res.selected: res.selected = False else: res.selected = True for child in self.children: for res in child.residues: if res.selected: res.selected = False else: res.selected = True
[docs] def hasSelectedResidues(self): """ :rtype: bool :return: True if any of the residues are selected, False otherwise """ for res in self.residues: if res.selected: return True return False
[docs] def hasSelectedChildren(self): """ Returns True if any of its children are selected. """ return any([child.selected for child in self.children])
[docs] def hasAllSelectedResidues(self): """ Checks if all residues in the sequence are selected. :rtype: bool :return: True if all residues are selected, False otherwise """ return all([res.selected for res in self.residues])
[docs] def deleteSelectedResidues(self): """ Removes all selected residues from the sequence. """ self.residues = [res for res in self.residues if not res.selected]
[docs] def hideChildren(self): """ Hides all child sequences (effectively collapsing the sequence). """ self.collapsed = True
[docs] def showChildren(self): """ Shows all child sequences (effectively expanding the sequence). """ self.collapsed = False
[docs] def calculatePlotValues(self, half_window_size, min_value=None, max_value=None): """ Calculates window-averaged plot values, and the plot value extrema. :type half_window_size: int :param half_window_size: half-size of the window (can be 0 if not averaging) :type min_value: float :param min_value: optional minimum value, if None then the minimum will be calculated :type max_value: float :param max_value: optional maximum value, if None then the minimum will be calculated """ if self.type == constants.SEQ_ANNOTATION: total = 0.0 for pos in range(self.length()): total = 0.0 n_total = 0 for win_pos in range(-half_window_size, half_window_size + 1): seq_pos = pos + win_pos if seq_pos >= 0 and seq_pos < self.length(): value = self.residues[seq_pos].value n_total += 1 if value: total += value total /= float(n_total) self.residues[pos].avg_value = total self.residues[pos].previous_avg_value = total self.residues[pos].next_avg_value = total min = max = total prev_res = None for res in self.residues: if res.avg_value < min: min = res.avg_value if res.avg_value > max: max = res.avg_value if prev_res: res.previous_avg_value = prev_res.avg_value prev_res.next_avg_value = res.avg_value prev_res = res if min_value is None: self.min_avg_value = min else: self.min_avg_value = min_value if max_value is None: self.max_avg_value = max else: self.max_avg_value = max_value
[docs] def propagateGapsToChildren(self, target_child=None): """ Propagates gaps from a parent sequence to all children. This method should be called after loading multiple alignment in order to ensure gap consistency between parent sequence and its children. :type target_child: `Sequence` :param target_child: If specified, only this child sequence will be used. """ # First, remove all gaps from the children sequences. for child in self.children: if not target_child or target_child == child: child.removeAllGaps() # Insert gaps into the child sequences. for pos in range(len(self.residues)): res = self.residues[pos] if res.is_gap: for child in self.children: if not target_child or target_child == child: gap_res = Residue() gap_res.code = res.code gap_res.is_gap = True gap_res.sequence = child child.residues.insert(pos, gap_res)
[docs] def propagateGaps(self, sequence, parent_sequence=None, replace=False): """ Propagates gaps from self to a given sequence. Sequence is supposed to be a subset of self. :rtype: list of `Residue` :return: list of residues including gaps at matching positions """ for index, res in enumerate(self.residues): res._index = index residue_list = sequence.gaplessResidues() seq1 = sequence.gaplessText() seq2 = self.gaplessText() if seq1 not in seq2: return None pos = seq2.find(seq1) index = self.gaplessResidues()[pos]._index new_list = [] for gap_pos in range(index): res = Residue() res.code = constants.UNLOCKED_GAP_SYMBOL res.is_gap = True res.sequence = parent_sequence new_list.append(res) position = 0 for res in self.residues[index:]: if res.is_gap: gap_res = res.copy() gap_res.sequence = parent_sequence new_list.append(gap_res) elif position < len(residue_list): res = residue_list[position] res.sequence = parent_sequence new_list.append(res) position += 1 if replace and new_list: sequence.residues = new_list return new_list
[docs] def calcIdentity(self, reference_sequence, consider_gaps, in_columns): """ This method calculates sequence identity between self and a specified reference sequence, assuming that both sequences are already aligned. :type reference_sequence: `Sequence` :param reference_sequence: reference sequence :type consider_gaps: bool :param consider_gaps: Should we include gaps in the calculation. :rtype: float :return: sequence identity (between 0.0 and 1.0) """ if not reference_sequence: return None length = self.length() if length > reference_sequence.length(): length = reference_sequence.length() id = 0 real_length = 0 for pos in range(length): res = self.residues[pos] ref_res = reference_sequence.residues[pos] if res.is_gap and ref_res.is_gap: continue if not (ref_res.is_gap or res.is_gap) or consider_gaps: if not in_columns or (res.selected and ref_res.selected): if res.code == ref_res.code: id += 1 real_length += 1 if real_length > 0: return old_div(float(id), real_length) return 0.0
[docs] def calcSimilarity(self, reference_sequence, consider_gaps, in_columns): """ This method calculates sequence similarity between self and a specified reference sequence, assuming that both sequences are already aligned. :type reference_sequence: `Sequence` :param reference_sequence: reference sequence :type consider_gaps: bool :param consider_gaps: Should we include gaps in the calculation. :rtype: float :return: sequence similarity (between 0.0 and 1.0) """ if not reference_sequence: return None length = self.length() if length > reference_sequence.length(): length = reference_sequence.length() id = 0 real_length = 0 for pos in range(length): res = self.residues[pos] ref_res = reference_sequence.residues[pos] score = utils.matrixValue(constants.SIMILARITY_MATRIX, res.code, ref_res.code) if res.is_gap and ref_res.is_gap: continue if not (ref_res.is_gap or res.is_gap) or consider_gaps: if not in_columns or (res.selected and ref_res.selected): if score > 0.0: id += 1 real_length += 1 if real_length > 0: return old_div(float(id), real_length) return 0.0
[docs] def calcHomology(self, reference_sequence, consider_gaps, in_columns): """ This method calculates sequence homolgy between self and a specified reference sequence, assuming that both sequences are already aligned. The homology criterion is based on "side chain chemistry" descriptor matching. :type reference_sequence: `Sequence` :param reference_sequence: reference sequence :type consider_gaps: bool :param consider_gaps: Should we include gaps in the calculation. :rtype: float :return: sequence similarity (between 0.0 and 1.0) """ if not reference_sequence: return None hom_dict = { "D": 1, "E": 1, "R": 2, "K": 2, "H": 2, "G": 3, "A": 3, "V": 3, "I": 3, "L": 3, "M": 3, "F": 4, "Y": 4, "W": 4, "S": 5, "T": 5, "N": 5, "Q": 5, "C": 6, "P": 7 } length = self.length() if length > reference_sequence.length(): length = reference_sequence.length() id = 0 real_length = 0 for pos in range(length): res = self.residues[pos] ref_res = reference_sequence.residues[pos] if res.is_gap and ref_res.is_gap: continue if not (ref_res.is_gap or res.is_gap) or consider_gaps: if not in_columns or (res.selected and ref_res.selected): if (res.code in hom_dict and ref_res.code in hom_dict and hom_dict[res.code] == hom_dict[ref_res.code]): id += 1 real_length += 1 if real_length > 0: return old_div(float(id), real_length) return 0.0
[docs] def calcScore(self, reference_sequence, consider_gaps, in_columns): """ This method calculates sequence similarity score between self and a specified reference sequence, assuming that both sequences are already aligned. :type reference_sequence: `Sequence` :param reference_sequence: reference sequence :rtype: float :return: sequence similarity score """ if not reference_sequence: return None length = self.length() if length > reference_sequence.length(): length = reference_sequence.length() score = 0.0 for pos in range(length): res = self.residues[pos] ref_res = reference_sequence.residues[pos] if res.is_gap and ref_res.is_gap: continue if not in_columns or (res.selected and ref_res.selected): score += utils.matrixValue(constants.SIMILARITY_MATRIX, res.code, ref_res.code) return score
[docs] def previousUngappedResidue(self, position): if position < 0 or position >= self.length(): return None position -= 1 while position >= 0: if not self.residues[position].is_gap: return self.residues[position] position -= 1 return None
[docs] def nextUngappedResidue(self, position): if position < 0 or position >= self.length(): return None position += 1 while position < self.length(): if not self.residues[position].is_gap: return self.residues[position] position += 1 return None
[docs] def ungappedId(self, position, start, end, backwards=False): """ Returns residue ID for the first ungapped position in a specified region, starting from position and going forward or backwards. If no valid position is found (i.e. all residues in the specified region are gaps), returns an empty string. :type start: int :param start: lower boundary of the search region :type end: int :param end: upper boundary of the search region :type position: int :param position: initial position :type backwards: bool :param backwards: if True, search the sequence backwards :rtype: string :return: ungapped residue ID, or empty string if no valid residue is found """ if start < 0 or start > self.length() or \ end < 0 or end > self.length() or \ position < 0 or position >= self.length(): return '' if backwards: while position >= start: res = self.residues[position] if not res.is_gap: return res.id() position -= 1 else: while position < end: res = self.residues[position] if not res.is_gap: return res.id() position += 1 # Not found, return an empty string. return ''
[docs] def hasAnnotationType(self, annotation_type): """ Checks if the sequence already has this annotation type. :type annotation_type: int :param annotation_type: annotation type :rtype: bool :return: True if the sequence has this annotation type already, False otherwise """ for child in self.children: if child.annotation_type == annotation_type: child.visible = True return True return False
[docs] def sanitize(self): """ Removes all gaps and illegal residue codes from self. """ amino_acids = list(constants.AMINO_ACIDS) amino_acids.append('-') amino_acids.append('~') self.residues = [ res for res in self.residues if res.code in amino_acids ] self.makeShortName()
[docs] def makeInactive(self): if self.type == constants.SEQ_AMINO_ACIDS: for res in self.residues: res.active = False
[docs] def makeActive(self): if self.type == constants.SEQ_AMINO_ACIDS: for res in self.residues: res.active = True
[docs] def haveAnchors(self, pos): while pos < self.length(): if not self.residues[pos].active: return True pos += 1 return False
[docs] def inactivePosition(self, pos): """ Finds first inactive residue position after given position. :type pos: int :param pos: start position in sequence to begin search :rtype: int :return: position of first inactive res. If none, returns -1 """ while pos < self.length(): if not self.residues[pos].active: return pos pos += 1 return -1
[docs] def makeShortName(self, name=None): """ This method converts a long sequence name into a short name that is displayed on a screen. """ if self.short_name and name is None: # remove redundant chain names while len(self.short_name) > 2 and self.short_name[-2] == '_' and \ self.short_name[-1] >= 'A' and self.short_name[-1] <= 'Z': self.short_name = self.short_name[:-2] return name_split = "" if not name: name = self.name if name: if name[0] >= '0' and name[0] <= '9' and (len(name) == 4 or \ (len(name) > 4 and name[4] == '_')): name = name[:4].upper() + name[4:] name_split = re.split("[ |,:]", name) if len(name_split) > 1: if name_split[0] == "gi" or name_split[0] == "pdb" or \ name_split[0] == "sp": self.short_name = name_split[1].upper() else: self.short_name = name_split[0] else: self.short_name = name if len(name_split) > 2 and len(name_split[2]) == 1: self.chain_id = name_split[2] else: self.name = "Sequence" self.short_name = "Sequence"
[docs] def createAnnotationSequence(self): """ Creates an empty annotation. """ plot = Sequence() plot.parent_sequence = self plot.type = constants.SEQ_ANNOTATION length = self.gaplessLength() plot.residues = [Residue(sequence=plot) for index in range(length)] self.children.append(plot) self.propagateGapsToChildren(target_child=plot) return plot
[docs] def createSecondaryAssignment(self): """ Creates an empty secondary structure assignment annotation. """ seq = Sequence() seq.chain_id = self.chain_id seq.type = constants.SEQ_SECONDARY seq.name = "Secondary Structure Assignment : " + self.chain_id seq.short_name = "SSA" seq.parent_sequence = self seq.residues = [ Residue(sequence=seq) for index in range(self.gaplessLength()) ] # Because seq is not child of self.sequences, gaps not propagated. self.propagateGapsToChildren(target_child=seq) return seq
[docs] def createSSBondAssignment(self): """ Creates an empty disulfide bond assignment annotation. """ seq = Sequence() seq.chain_id = self.chain_id seq.type = constants.SEQ_ANNOTATION seq.annotation_type = constants.ANNOTATION_SSBOND seq.name = "Disulfide Bonds : " + self.chain_id seq.short_name = "SSBOND" seq._tmp_bond_list = [] seq.bond_list = [] seq.parent_sequence = self seq.residues = [ Residue(sequence=seq) for index in range(self.gaplessLength()) ] # Because seq is not a child of self.sequences, gaps not propagated: self.propagateGapsToChildren(target_child=seq) return seq
[docs] def compare(self, sequence): """ Compares gapless version of self with other sequences and calculates identity between both. """ res_list1 = self.gaplessResidues() res_list2 = sequence.gaplessResidues() if len(res_list1) != len(res_list2): return -1 if len(res_list1) == 0: return -1 id = 0 for pos in range(len(res_list1)): if res_list1[pos].code == res_list2[pos].code: id += 1 return old_div(id, len(res_list1))
[docs] def getPDBId(self, with_chain=True): """ This function tries to generate a PDB ID based on the sequence name. It supports different name formats: 1abcD, pdb|1abc|D, 1ABCD If the conversion fails, it will return an empty string. """ pdb_id = "" short_name = self.short_name short_name = delete_from_str(short_name, '.|:,_#!') if short_name.startswith("pdb"): short_name = short_name[3:] if len(short_name) and \ not short_name[0].isalnum(): short_name = short_name[1:] if len(short_name) and \ short_name[0].isdigit(): pdb_id += short_name[0] short_name = short_name[1:] if len(short_name) >= 3: if not short_name[0].isalnum(): short_name = short_name[1:] code = short_name[0:3].lower() if code.isalnum(): pdb_id += code short_name = short_name[3:] if len(short_name): if not short_name[0].isalnum(): short_name = short_name[1:] if short_name[0].isalnum(): pdb_id += short_name[0].upper() if with_chain and self.chain_id > ' ': pdb_id += self.chain_id return pdb_id
[docs] def isValidTemplate(self, reference=None): if self.visible and self != reference and \ (self.has_structure or self.from_maestro) and self.gaplessLength(): return True return False
[docs] def isValidProtein(self, global_annotation=False): if self.visible: if self.type == constants.SEQ_AMINO_ACIDS: return True if global_annotation and (self.type == constants.SEQ_CONSENSUS or \ self.type == constants.SEQ_LOGO or \ self.short_name == "Sequence Profile"): return True return False
[docs] def isRuler(self): return self.type == constants.SEQ_RULER
[docs] def isDNA(self): """ Returns True if the sequence is DNA sequence. """ text = self.gaplessText() for res in text: if res not in "CTGA": return False return True
[docs] def translateDNA(self, translation_table=constants.TRANSLATION_TABLE_DNA): """ Translates the sequence from nucleotide codes to amino acids. """ DNA_codes = set('CTGA') text = self.gaplessText() text = ''.join(res for res in text if res in DNA_codes) try: translated_text = ''.join( [translation_table.get(text[3 * i:3 * i + 3], 'X') for \ i in range(old_div(len(text), 3))]) except: return False self.residues = [] self.appendResidues(translated_text) self.children = [] return True
[docs] def isRNA(self): """ Returns True if the sequence is RNA sequence. """ text = self.gaplessText() for res in text: if res not in "CUGA": return False return True
[docs] def translateRNA(self, translation_table=constants.TRANSLATION_TABLE_RNA): """ Translates the sequence from nucleotide codes to amino acids. """ RNA_codes = set('CUGA') text = self.gaplessText() text = ''.join(res for res in text if res in RNA_codes) try: translated_text = ''.join([translation_table.get( text[3 * i:3 * i + 3], 'X') for i in \ range(old_div(len(text), 3))]) except: return False self.residues = [] self.appendResidues(translated_text) self.children = [] return True
[docs] def renumberResidues(self, start, incr, preserve_ins_codes=False): num = start for res in self.residues: res.num = num num += incr if preserve_ins_codes: continue res.icode = ' '
[docs] def getValues(self, gapless=False): """ Returns a list of residue values. """ if gapless: residues = self.gaplessResidues() else: residues = self.residues return [res.value for res in residues]
[docs] def isSortable(self, reference=None): """ Returns True if the sequence is sortable, False otherwise. """ if (self.global_sequence or self.type == constants.SEQ_RULER or self.type == constants.SEQ_SEPARATOR or self.type == constants.SEQ_HISTORY or self.type == constants.SEQ_CONSTRAINTS or self.parent_sequence or self == reference): return False return True
[docs] def repair(self): """ Repairs the sequence by setting sequence-residue associations for all residues. Also, adds missing attributes (using default values) to the sequence. """ for res in self.residues: res.repair() res.sequence = self for child in self.children: child.repair() # Add missing attributes empty_sequence = Sequence() for attr in list(empty_sequence.__dict__): if not hasattr(self, attr): setattr(self, attr, getattr(empty_sequence, attr))