"""
Implementation of multiple sequence viewer Sequence class.
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Piotr Rotkiewicz
import copy
import re
from past.utils import old_div
from . import constants
from . import utils
from .residue import Residue
[docs]def delete_from_str(inp_str, delete_chars):
"""
Delete characters from a string.
Note: replaces Python 2 inp_str.translate(None, delete_chars)
:param inp_str: A string to delete characters from. In Python 2, unicode
input will be cast to str
:type inp_str: str
:param delete_chars: Characters to delete from the string
:type delete_chars: str
:return: The input string with the delete_chars removed
:rtype: str
"""
translator = str.maketrans('', '', delete_chars)
return inp_str.translate(translator)
[docs]class Sequence(object):
"""
The sequence class represents a single basic sequence object. The Sequence
object can correspond to amino acid sequence, nucleic acid sequence,
annotation (such as secondary structure assignment or hydrophobicity plot)
or helper object (for example, a ruler).
"""
[docs] def __init__(self):
self.residues = [] #Actual sequence (list of Residues).
#Temp list of residues. Used to store them in 'hidden columns' mode.
self.tmp_residues = None
self.tmp_children = None
self.type = constants.SEQ_AMINO_ACIDS #Possible seq types in constants.py
self.name = "" #Long seq name displayed in tooltip.
self.short_name = "" #Short seq name displayed in list area.
self.visible = True
self.from_maestro = False #If seq has corresponding entry in Maestro.
self.maestro_entry_id = None
self.maestro_chain_name = None
self.selected = False
self.collapsed = False
self.parent_sequence = None
self.min_avg_value = 0.0
self.max_avg_value = 1.0
self.plot_style = constants.PLOT_HISTOGRAM #For annotation plots.
self.plot_color = (64, 64, 64)
self.annotation_type = None #of sequence
self.global_sequence = False
self.chain_id = ' ' #of sequence
self.children = [] #children sequences
self.color_scheme = constants.COLOR_SIDECHAIN_CHEMISTRY #of sequence
self.height = 1 #in characters
self.identity = 0.0 #seq identity between self and reference seq.
self.similarity = 0.0 #seq similarity between self and reference seq.
self.score = 0.0 #seq score between self and reference seq.
self.homology = 0.0 #seq homology between self and reference seq.
self.last_hidden = False #hidden sequence indicator.
#If seq has PDB file or Maestro entry struct associated with it.
self.has_structure = False
#If sequence corresponds to entry currently included in Maestro wkspce.
self.maestro_included = False
self.color = (0, 0, 0) #RGB for displayed color of sequence name.
self.parent_sequence = None #None if this is a parent sequence.
self.ssb_bond_list = [] #SS bridges list
self.custom_color = (255, 255, 255) #for the entire sequence.
[docs] def appendResidue(self, residue):
"""
Appends a new residue to self.
:type residue : sequence alignment Residue object
"""
if residue:
self.residues.append(residue)
[docs] def appendResidues(self, codes, use_numbers=False):
"""
Create new residues based on a single-code string and append them
to existing sequence. Converts upper-case characters to lower-case,
recognize gaps ('.', '-', '~') and ignore other characters.
:param codes: single-code amino acid
:type codes: string
:type use_numbers: boolean
:param use_numbers: If true, this function will try to recognize
residue numbers included in the sequence
and assign them to the residues.
"""
index = 1
if self.length():
index = self.residues[-1].num
res_list = []
domidx = 1
nums = ""
for ch in codes:
if not ch:
continue
ch = str(ch).upper()
if ch >= '0' and ch <= '9':
nums += ch
elif ch > ' ' and \
((ch >= 'A' and ch <= 'Z') or
ch in ['.', '-', '~']):
if nums:
index = int(nums)
nums = ""
res = Residue()
res.num = index
if ch in ['.', '-', '~']:
ch = constants.UNLOCKED_GAP_SYMBOL
res.is_gap = True
else:
index = index + 1
res.code = ch
res.makeName()
res.sequence = self
res_list.append(res)
self.residues += res_list
[docs] def removeStructureless(self):
"""
Removes structureless (SEQRES) residues from the sequence
and its children.
"""
for child in self.children:
child.residues = [
res for parent_res, res in zip(self.residues, child.residues)
if not parent_res.structureless
]
self.residues = [res for res in self.residues if not res.structureless]
[docs] def replaceSequence(self, new_sequence):
"""
This method replaces current sequence with the provided string.
:type new_sequence: str
:param new_sequence: Must be same gapless length as old sequence.
:rtype: bool
:return: True if successful
"""
self.removeStructureless()
gapless = self.gaplessResidues()
gapless_new = delete_from_str(new_sequence, ' .~-')
if len(gapless) != len(gapless_new):
# Something is wrong; gapless lengths don't match.
return False
pos = 0
new_residues = []
for c in new_sequence:
if c in ".~-":
res = Residue()
res.is_gap = True
res.code = '~'
res.sequence = self
else:
res = gapless[pos]
pos += 1
new_residues.append(res)
self.residues = new_residues
self.propagateGapsToChildren()
return True
[docs] def toString(self, with_gaps=True):
"""
Returns a string representation of self.
:type with_gaps: boolean (default=True)
:param with_gaps: optional parameter, if True the returned string will
include gaps, if False - only actual residue codes.
"""
out_string = ""
for res in self.residues:
if not res.is_gap or with_gaps:
out_string += res.code
return out_string
[docs] def text(self):
"""
Returns self as a string.
"""
return "".join([res.code for res in self.residues])
[docs] def gaplessText(self):
"""
Returns self as a gapless string.
"""
return "".join([res.code for res in self.residues if not res.is_gap])
[docs] def copyForUndo(self, deep_copy=True):
if deep_copy:
sequence_copy = copy.deepcopy(self)
else:
sequence_copy = copy.copy(self)
return sequence_copy
[docs] def length(self):
"""
Returns a length of the sequence.
:rtype: int
:return: lengh of the sequence
"""
return len(self.residues)
[docs] def unpaddedLength(self):
"""
Returns a length of the sequence with rightmost gaps stripped out.
:rtype: int
:return: length of the stripped sequence
"""
total = len(self.residues) - 1
if total <= 0:
return 0
while total and self.residues[total].is_gap:
total -= 1
return total
[docs] def gaplessLength(self):
"""
Returns a length of the sequence excluding gaps.
:rtype: int
:return: actual sequence length (number of residues)
"""
return len(self.gaplessResidues())
[docs] def gaplessResidues(self):
"""
Returns a list of gapless residues.
"""
return [res for res in self.residues if not res.is_gap]
[docs] def numberOfGaps(self):
"""
Returns a number of gaps in the sequence.
:rtype: int
:return: number of gaps in the sequence
"""
return sum(1 for res in self.residues if res.is_gap)
[docs] def countActiveGaps(self, pos):
count = 0
while pos < self.length():
if not self.residues[pos].active:
break
if self.residues[pos].is_gap:
count += 1
pos += 1
if pos == self.length():
count = -1
return count
[docs] def getResidue(self, index, ungapped=False, hidden=True):
"""
Returns a residue at a given sequence position, or None if the
given position is invalid.
:type index: int
:param index: sequence position
:rtype: `Residue`
:return: residue for a given position, or None if the position
is invalid
"""
if hidden and self.tmp_residues:
residues = self.tmp_residues
else:
residues = self.residues
if ungapped:
res_list = [res for res in residues if not res.is_gap]
else:
res_list = residues
if index >= 0 and \
index < len(res_list):
return res_list[index]
return None
[docs] def getResidueIndex(self, id):
"""
Returns index of residue with given id
:type id : string
:param id : str(res.num) + str(res.icode)
:rtype : int if valid id, None if not
:return : index of res if valid id, None if not
"""
id = id.strip().rstrip()
for index, res in enumerate(self.residues):
if res.id().rstrip() == id:
return index
return None
[docs] def getUngappedIndex(self, index):
"""
Returns a residue index corresponding to ungapped position.
:type index: int
:param index: Residue index in gapped sequence
:rtype: int
:return: Index in ungapped sequence.
"""
if index < 0 or index >= self.length():
return -1
res = self.residues[index]
if res.is_gap:
return -1
res_list = self.gaplessResidues()
return res_list.index(res)
[docs] def insertGaps(self, position, n_gaps, active=True):
"""
Inserts a specified number of gaps at a specified position.
:type position: int
:param position: sequence position where the gaps will be inserted
:type n_gaps: int
:param n_gaps: number of gaps to be inserted at the position
:rtype: int
:return: number of gaps actually inserted at the position
"""
n_inserted = 0
for gap in range(n_gaps):
res = Residue()
res.code = constants.UNLOCKED_GAP_SYMBOL
res.is_gap = True
res.sequence = self
res.active = active
self.residues.insert(position, res)
n_inserted = n_inserted + 1
return n_inserted
[docs] def removeGaps(self, position, n_gaps):
"""
Removes a specified number of gaps (or less) at a given position,
starting from position and going to C-terminus. (towards higher index)
:type position: int
:param position: sequence position from where the gaps will be removed
:type n_gaps: int
:param n_gaps: number of gaps to be removed at the position
:rtype: int
:return: number of gaps actually removed at the position
"""
n_removed = 0
for pos in range(position, len(self.residues)):
if pos >= 0 and pos < self.length():
while pos >= 0 and pos < self.length() and \
self.residues[pos].is_gap and \
self.residues[pos].code == constants.UNLOCKED_GAP_SYMBOL:
if not self.residues[pos].active:
break
self.residues.pop(pos)
n_removed = n_removed + 1
n_gaps = n_gaps - 1
if n_gaps == 0:
break
if n_gaps == 0:
break
return n_removed
[docs] def removeGapsBackwards(self, position, n_gaps):
"""
Removes a specified number of gaps (or less) at a given position,
starting at the position and going to N-terminus. (towards lower index)
:type position: int
:param position: sequence position from where the gaps will be removed
:type n_gaps: int
:param n_gaps: number of gaps to be removed at the position
:rtype: int
:return: number of gaps actually removed at the position
"""
n_removed = 0
for pos in range(position, -1, -1):
if pos < 0 or pos >= len(self.residues):
continue
while self.residues[pos].is_gap and \
self.residues[pos].code == constants.UNLOCKED_GAP_SYMBOL:
if not self.residues[pos].active:
break
self.residues.pop(pos)
n_removed = n_removed + 1
n_gaps = n_gaps - 1
if n_gaps == 0:
break
if n_gaps == 0:
break
return n_removed
[docs] def removeAllGaps(self, selected_only=False):
"""
Removes all gaps from the sequence. If selected_only, only removes gaps
if gaps are selected.
"""
any_selected = self.hasSelectedResidues()
if selected_only and any_selected:
self.residues = [
res for res in self.residues
if not res.is_gap or not res.selected
]
else:
self.residues = self.gaplessResidues()
[docs] def unselectResidues(self):
"""
Unselects all residues in the sequence
"""
for res in self.residues:
res.selected = False
for child in self.children:
for res in child.residues:
res.selected = False
[docs] def selectAllResidues(self):
for res in self.residues:
res.selected = True
for child in self.children:
for res in child.residues:
res.selected = True
[docs] def invertSelection(self):
for res in self.residues:
if res.selected:
res.selected = False
else:
res.selected = True
for child in self.children:
for res in child.residues:
if res.selected:
res.selected = False
else:
res.selected = True
[docs] def hasSelectedResidues(self):
"""
:rtype: bool
:return: True if any of the residues are selected, False otherwise
"""
for res in self.residues:
if res.selected:
return True
return False
[docs] def hasSelectedChildren(self):
"""
Returns True if any of its children are selected.
"""
return any([child.selected for child in self.children])
[docs] def hasAllSelectedResidues(self):
"""
Checks if all residues in the sequence are selected.
:rtype: bool
:return: True if all residues are selected, False otherwise
"""
return all([res.selected for res in self.residues])
[docs] def deleteSelectedResidues(self):
"""
Removes all selected residues from the sequence.
"""
self.residues = [res for res in self.residues if not res.selected]
[docs] def hideChildren(self):
"""
Hides all child sequences (effectively collapsing the sequence).
"""
self.collapsed = True
[docs] def showChildren(self):
"""
Shows all child sequences (effectively expanding the sequence).
"""
self.collapsed = False
[docs] def calculatePlotValues(self,
half_window_size,
min_value=None,
max_value=None):
"""
Calculates window-averaged plot values, and the plot value extrema.
:type half_window_size: int
:param half_window_size: half-size of the window (can be 0 if not
averaging)
:type min_value: float
:param min_value: optional minimum value, if None then the minimum
will be calculated
:type max_value: float
:param max_value: optional maximum value, if None then the minimum
will be calculated
"""
if self.type == constants.SEQ_ANNOTATION:
total = 0.0
for pos in range(self.length()):
total = 0.0
n_total = 0
for win_pos in range(-half_window_size, half_window_size + 1):
seq_pos = pos + win_pos
if seq_pos >= 0 and seq_pos < self.length():
value = self.residues[seq_pos].value
n_total += 1
if value:
total += value
total /= float(n_total)
self.residues[pos].avg_value = total
self.residues[pos].previous_avg_value = total
self.residues[pos].next_avg_value = total
min = max = total
prev_res = None
for res in self.residues:
if res.avg_value < min:
min = res.avg_value
if res.avg_value > max:
max = res.avg_value
if prev_res:
res.previous_avg_value = prev_res.avg_value
prev_res.next_avg_value = res.avg_value
prev_res = res
if min_value is None:
self.min_avg_value = min
else:
self.min_avg_value = min_value
if max_value is None:
self.max_avg_value = max
else:
self.max_avg_value = max_value
[docs] def propagateGapsToChildren(self, target_child=None):
"""
Propagates gaps from a parent sequence to all children. This method
should be called after loading multiple alignment in order to ensure
gap consistency between parent sequence and its children.
:type target_child: `Sequence`
:param target_child: If specified, only this child sequence
will be used.
"""
# First, remove all gaps from the children sequences.
for child in self.children:
if not target_child or target_child == child:
child.removeAllGaps()
# Insert gaps into the child sequences.
for pos in range(len(self.residues)):
res = self.residues[pos]
if res.is_gap:
for child in self.children:
if not target_child or target_child == child:
gap_res = Residue()
gap_res.code = res.code
gap_res.is_gap = True
gap_res.sequence = child
child.residues.insert(pos, gap_res)
[docs] def propagateGaps(self, sequence, parent_sequence=None, replace=False):
"""
Propagates gaps from self to a given sequence. Sequence
is supposed to be a subset of self.
:rtype: list of `Residue`
:return: list of residues including gaps at matching positions
"""
for index, res in enumerate(self.residues):
res._index = index
residue_list = sequence.gaplessResidues()
seq1 = sequence.gaplessText()
seq2 = self.gaplessText()
if seq1 not in seq2:
return None
pos = seq2.find(seq1)
index = self.gaplessResidues()[pos]._index
new_list = []
for gap_pos in range(index):
res = Residue()
res.code = constants.UNLOCKED_GAP_SYMBOL
res.is_gap = True
res.sequence = parent_sequence
new_list.append(res)
position = 0
for res in self.residues[index:]:
if res.is_gap:
gap_res = res.copy()
gap_res.sequence = parent_sequence
new_list.append(gap_res)
elif position < len(residue_list):
res = residue_list[position]
res.sequence = parent_sequence
new_list.append(res)
position += 1
if replace and new_list:
sequence.residues = new_list
return new_list
[docs] def calcIdentity(self, reference_sequence, consider_gaps, in_columns):
"""
This method calculates sequence identity between self and a specified
reference sequence, assuming that both sequences are already aligned.
:type reference_sequence: `Sequence`
:param reference_sequence: reference sequence
:type consider_gaps: bool
:param consider_gaps: Should we include gaps in the calculation.
:rtype: float
:return: sequence identity (between 0.0 and 1.0)
"""
if not reference_sequence:
return None
length = self.length()
if length > reference_sequence.length():
length = reference_sequence.length()
id = 0
real_length = 0
for pos in range(length):
res = self.residues[pos]
ref_res = reference_sequence.residues[pos]
if res.is_gap and ref_res.is_gap:
continue
if not (ref_res.is_gap or res.is_gap) or consider_gaps:
if not in_columns or (res.selected and ref_res.selected):
if res.code == ref_res.code:
id += 1
real_length += 1
if real_length > 0:
return old_div(float(id), real_length)
return 0.0
[docs] def calcSimilarity(self, reference_sequence, consider_gaps, in_columns):
"""
This method calculates sequence similarity between self and a specified
reference sequence, assuming that both sequences are already aligned.
:type reference_sequence: `Sequence`
:param reference_sequence: reference sequence
:type consider_gaps: bool
:param consider_gaps: Should we include gaps in the calculation.
:rtype: float
:return: sequence similarity (between 0.0 and 1.0)
"""
if not reference_sequence:
return None
length = self.length()
if length > reference_sequence.length():
length = reference_sequence.length()
id = 0
real_length = 0
for pos in range(length):
res = self.residues[pos]
ref_res = reference_sequence.residues[pos]
score = utils.matrixValue(constants.SIMILARITY_MATRIX, res.code,
ref_res.code)
if res.is_gap and ref_res.is_gap:
continue
if not (ref_res.is_gap or res.is_gap) or consider_gaps:
if not in_columns or (res.selected and ref_res.selected):
if score > 0.0:
id += 1
real_length += 1
if real_length > 0:
return old_div(float(id), real_length)
return 0.0
[docs] def calcHomology(self, reference_sequence, consider_gaps, in_columns):
"""
This method calculates sequence homolgy between self and a specified
reference sequence, assuming that both sequences are already aligned.
The homology criterion is based on "side chain chemistry" descriptor
matching.
:type reference_sequence: `Sequence`
:param reference_sequence: reference sequence
:type consider_gaps: bool
:param consider_gaps: Should we include gaps in the calculation.
:rtype: float
:return: sequence similarity (between 0.0 and 1.0)
"""
if not reference_sequence:
return None
hom_dict = {
"D": 1,
"E": 1,
"R": 2,
"K": 2,
"H": 2,
"G": 3,
"A": 3,
"V": 3,
"I": 3,
"L": 3,
"M": 3,
"F": 4,
"Y": 4,
"W": 4,
"S": 5,
"T": 5,
"N": 5,
"Q": 5,
"C": 6,
"P": 7
}
length = self.length()
if length > reference_sequence.length():
length = reference_sequence.length()
id = 0
real_length = 0
for pos in range(length):
res = self.residues[pos]
ref_res = reference_sequence.residues[pos]
if res.is_gap and ref_res.is_gap:
continue
if not (ref_res.is_gap or res.is_gap) or consider_gaps:
if not in_columns or (res.selected and ref_res.selected):
if (res.code in hom_dict and ref_res.code in hom_dict and
hom_dict[res.code] == hom_dict[ref_res.code]):
id += 1
real_length += 1
if real_length > 0:
return old_div(float(id), real_length)
return 0.0
[docs] def calcScore(self, reference_sequence, consider_gaps, in_columns):
"""
This method calculates sequence similarity score between self and
a specified reference sequence, assuming that both sequences
are already aligned.
:type reference_sequence: `Sequence`
:param reference_sequence: reference sequence
:rtype: float
:return: sequence similarity score
"""
if not reference_sequence:
return None
length = self.length()
if length > reference_sequence.length():
length = reference_sequence.length()
score = 0.0
for pos in range(length):
res = self.residues[pos]
ref_res = reference_sequence.residues[pos]
if res.is_gap and ref_res.is_gap:
continue
if not in_columns or (res.selected and ref_res.selected):
score += utils.matrixValue(constants.SIMILARITY_MATRIX,
res.code, ref_res.code)
return score
[docs] def previousUngappedResidue(self, position):
if position < 0 or position >= self.length():
return None
position -= 1
while position >= 0:
if not self.residues[position].is_gap:
return self.residues[position]
position -= 1
return None
[docs] def nextUngappedResidue(self, position):
if position < 0 or position >= self.length():
return None
position += 1
while position < self.length():
if not self.residues[position].is_gap:
return self.residues[position]
position += 1
return None
[docs] def ungappedId(self, position, start, end, backwards=False):
"""
Returns residue ID for the first ungapped position in a specified
region, starting from position and going forward or backwards. If no
valid position is found (i.e. all residues in the specified region are
gaps), returns an empty string.
:type start: int
:param start: lower boundary of the search region
:type end: int
:param end: upper boundary of the search region
:type position: int
:param position: initial position
:type backwards: bool
:param backwards: if True, search the sequence backwards
:rtype: string
:return: ungapped residue ID, or empty string if no valid residue
is found
"""
if start < 0 or start > self.length() or \
end < 0 or end > self.length() or \
position < 0 or position >= self.length():
return ''
if backwards:
while position >= start:
res = self.residues[position]
if not res.is_gap:
return res.id()
position -= 1
else:
while position < end:
res = self.residues[position]
if not res.is_gap:
return res.id()
position += 1
# Not found, return an empty string.
return ''
[docs] def hasAnnotationType(self, annotation_type):
"""
Checks if the sequence already has this annotation type.
:type annotation_type: int
:param annotation_type: annotation type
:rtype: bool
:return: True if the sequence has this annotation type already,
False otherwise
"""
for child in self.children:
if child.annotation_type == annotation_type:
child.visible = True
return True
return False
[docs] def sanitize(self):
"""
Removes all gaps and illegal residue codes from self.
"""
amino_acids = list(constants.AMINO_ACIDS)
amino_acids.append('-')
amino_acids.append('~')
self.residues = [
res for res in self.residues if res.code in amino_acids
]
self.makeShortName()
[docs] def makeInactive(self):
if self.type == constants.SEQ_AMINO_ACIDS:
for res in self.residues:
res.active = False
[docs] def makeActive(self):
if self.type == constants.SEQ_AMINO_ACIDS:
for res in self.residues:
res.active = True
[docs] def haveAnchors(self, pos):
while pos < self.length():
if not self.residues[pos].active:
return True
pos += 1
return False
[docs] def inactivePosition(self, pos):
"""
Finds first inactive residue position after given position.
:type pos: int
:param pos: start position in sequence to begin search
:rtype: int
:return: position of first inactive res. If none, returns -1
"""
while pos < self.length():
if not self.residues[pos].active:
return pos
pos += 1
return -1
[docs] def makeShortName(self, name=None):
"""
This method converts a long sequence name into a short name
that is displayed on a screen.
"""
if self.short_name and name is None:
# remove redundant chain names
while len(self.short_name) > 2 and self.short_name[-2] == '_' and \
self.short_name[-1] >= 'A' and self.short_name[-1] <= 'Z':
self.short_name = self.short_name[:-2]
return
name_split = ""
if not name:
name = self.name
if name:
if name[0] >= '0' and name[0] <= '9' and (len(name) == 4 or \
(len(name) > 4 and name[4] == '_')):
name = name[:4].upper() + name[4:]
name_split = re.split("[ |,:]", name)
if len(name_split) > 1:
if name_split[0] == "gi" or name_split[0] == "pdb" or \
name_split[0] == "sp":
self.short_name = name_split[1].upper()
else:
self.short_name = name_split[0]
else:
self.short_name = name
if len(name_split) > 2 and len(name_split[2]) == 1:
self.chain_id = name_split[2]
else:
self.name = "Sequence"
self.short_name = "Sequence"
[docs] def createAnnotationSequence(self):
"""
Creates an empty annotation.
"""
plot = Sequence()
plot.parent_sequence = self
plot.type = constants.SEQ_ANNOTATION
length = self.gaplessLength()
plot.residues = [Residue(sequence=plot) for index in range(length)]
self.children.append(plot)
self.propagateGapsToChildren(target_child=plot)
return plot
[docs] def createSecondaryAssignment(self):
"""
Creates an empty secondary structure assignment annotation.
"""
seq = Sequence()
seq.chain_id = self.chain_id
seq.type = constants.SEQ_SECONDARY
seq.name = "Secondary Structure Assignment : " + self.chain_id
seq.short_name = "SSA"
seq.parent_sequence = self
seq.residues = [
Residue(sequence=seq) for index in range(self.gaplessLength())
]
# Because seq is not child of self.sequences, gaps not propagated.
self.propagateGapsToChildren(target_child=seq)
return seq
[docs] def createSSBondAssignment(self):
"""
Creates an empty disulfide bond assignment annotation.
"""
seq = Sequence()
seq.chain_id = self.chain_id
seq.type = constants.SEQ_ANNOTATION
seq.annotation_type = constants.ANNOTATION_SSBOND
seq.name = "Disulfide Bonds : " + self.chain_id
seq.short_name = "SSBOND"
seq._tmp_bond_list = []
seq.bond_list = []
seq.parent_sequence = self
seq.residues = [
Residue(sequence=seq) for index in range(self.gaplessLength())
]
# Because seq is not a child of self.sequences, gaps not propagated:
self.propagateGapsToChildren(target_child=seq)
return seq
[docs] def compare(self, sequence):
"""
Compares gapless version of self with other sequences and calculates
identity between both.
"""
res_list1 = self.gaplessResidues()
res_list2 = sequence.gaplessResidues()
if len(res_list1) != len(res_list2):
return -1
if len(res_list1) == 0:
return -1
id = 0
for pos in range(len(res_list1)):
if res_list1[pos].code == res_list2[pos].code:
id += 1
return old_div(id, len(res_list1))
[docs] def getPDBId(self, with_chain=True):
"""
This function tries to generate a PDB ID based on the sequence name.
It supports different name formats: 1abcD, pdb|1abc|D, 1ABCD
If the conversion fails, it will return an empty string.
"""
pdb_id = ""
short_name = self.short_name
short_name = delete_from_str(short_name, '.|:,_#!')
if short_name.startswith("pdb"):
short_name = short_name[3:]
if len(short_name) and \
not short_name[0].isalnum():
short_name = short_name[1:]
if len(short_name) and \
short_name[0].isdigit():
pdb_id += short_name[0]
short_name = short_name[1:]
if len(short_name) >= 3:
if not short_name[0].isalnum():
short_name = short_name[1:]
code = short_name[0:3].lower()
if code.isalnum():
pdb_id += code
short_name = short_name[3:]
if len(short_name):
if not short_name[0].isalnum():
short_name = short_name[1:]
if short_name[0].isalnum():
pdb_id += short_name[0].upper()
if with_chain and self.chain_id > ' ':
pdb_id += self.chain_id
return pdb_id
[docs] def isValidTemplate(self, reference=None):
if self.visible and self != reference and \
(self.has_structure or self.from_maestro) and self.gaplessLength():
return True
return False
[docs] def isValidProtein(self, global_annotation=False):
if self.visible:
if self.type == constants.SEQ_AMINO_ACIDS:
return True
if global_annotation and (self.type == constants.SEQ_CONSENSUS or \
self.type == constants.SEQ_LOGO or \
self.short_name == "Sequence Profile"):
return True
return False
[docs] def isRuler(self):
return self.type == constants.SEQ_RULER
[docs] def isDNA(self):
"""
Returns True if the sequence is DNA sequence.
"""
text = self.gaplessText()
for res in text:
if res not in "CTGA":
return False
return True
[docs] def translateDNA(self, translation_table=constants.TRANSLATION_TABLE_DNA):
"""
Translates the sequence from nucleotide codes to amino acids.
"""
DNA_codes = set('CTGA')
text = self.gaplessText()
text = ''.join(res for res in text if res in DNA_codes)
try:
translated_text = ''.join(
[translation_table.get(text[3 * i:3 * i + 3], 'X') for \
i in range(old_div(len(text), 3))])
except:
return False
self.residues = []
self.appendResidues(translated_text)
self.children = []
return True
[docs] def isRNA(self):
"""
Returns True if the sequence is RNA sequence.
"""
text = self.gaplessText()
for res in text:
if res not in "CUGA":
return False
return True
[docs] def translateRNA(self, translation_table=constants.TRANSLATION_TABLE_RNA):
"""
Translates the sequence from nucleotide codes to amino acids.
"""
RNA_codes = set('CUGA')
text = self.gaplessText()
text = ''.join(res for res in text if res in RNA_codes)
try:
translated_text = ''.join([translation_table.get(
text[3 * i:3 * i + 3], 'X') for i in \
range(old_div(len(text), 3))])
except:
return False
self.residues = []
self.appendResidues(translated_text)
self.children = []
return True
[docs] def renumberResidues(self, start, incr, preserve_ins_codes=False):
num = start
for res in self.residues:
res.num = num
num += incr
if preserve_ins_codes:
continue
res.icode = ' '
[docs] def getValues(self, gapless=False):
"""
Returns a list of residue values.
"""
if gapless:
residues = self.gaplessResidues()
else:
residues = self.residues
return [res.value for res in residues]
[docs] def isSortable(self, reference=None):
"""
Returns True if the sequence is sortable, False otherwise.
"""
if (self.global_sequence or self.type == constants.SEQ_RULER or
self.type == constants.SEQ_SEPARATOR or
self.type == constants.SEQ_HISTORY or
self.type == constants.SEQ_CONSTRAINTS or
self.parent_sequence or self == reference):
return False
return True
[docs] def repair(self):
"""
Repairs the sequence by setting sequence-residue associations
for all residues. Also, adds missing attributes (using
default values) to the sequence.
"""
for res in self.residues:
res.repair()
res.sequence = self
for child in self.children:
child.repair()
# Add missing attributes
empty_sequence = Sequence()
for attr in list(empty_sequence.__dict__):
if not hasattr(self, attr):
setattr(self, attr, getattr(empty_sequence, attr))