Source code for schrodinger.ui.sequencealignment.sequence

"""
Implementation of multiple sequence viewer Sequence class.

Copyright Schrodinger, LLC. All rights reserved.
"""

# Contributors: Piotr Rotkiewicz

import copy
import re
from past.utils import old_div

from . import constants
from . import utils
from .residue import Residue


[docs]def delete_from_str(inp_str, delete_chars):
    """
    Delete characters from a string.

    Note: replaces Python 2 inp_str.translate(None, delete_chars)

    :param inp_str: A string to delete characters from. In Python 2, unicode
        input will be cast to str
    :type  inp_str: str

    :param delete_chars: Characters to delete from the string
    :type  delete_chars: str

    :return: The input string with the delete_chars removed
    :rtype: str
    """
    translator = str.maketrans('', '', delete_chars)
    return inp_str.translate(translator)


[docs]class Sequence(object):
    """
    The sequence class represents a single basic sequence object. The Sequence
    object can correspond to amino acid sequence, nucleic acid sequence,
    annotation (such as secondary structure assignment or hydrophobicity plot)
    or helper object (for example, a ruler).
    """

[docs]    def __init__(self):
        self.residues = []  #Actual sequence (list of Residues).
        #Temp list of residues. Used to store them in 'hidden columns' mode.
        self.tmp_residues = None
        self.tmp_children = None
        self.type = constants.SEQ_AMINO_ACIDS  #Possible seq types in constants.py
        self.name = ""  #Long seq name displayed in tooltip.
        self.short_name = ""  #Short seq name displayed in list area.
        self.visible = True

        self.from_maestro = False  #If seq has corresponding entry in Maestro.
        self.maestro_entry_id = None
        self.maestro_chain_name = None

        self.selected = False
        self.collapsed = False
        self.parent_sequence = None
        self.min_avg_value = 0.0
        self.max_avg_value = 1.0

        self.plot_style = constants.PLOT_HISTOGRAM  #For annotation plots.
        self.plot_color = (64, 64, 64)
        self.annotation_type = None  #of sequence

        self.global_sequence = False
        self.chain_id = ' '  #of sequence
        self.children = []  #children sequences

        self.color_scheme = constants.COLOR_SIDECHAIN_CHEMISTRY  #of sequence
        self.height = 1  #in characters
        self.identity = 0.0  #seq identity between self and reference seq.
        self.similarity = 0.0  #seq similarity between self and reference seq.
        self.score = 0.0  #seq score between self and reference seq.
        self.homology = 0.0  #seq homology between self and reference seq.

        self.last_hidden = False  #hidden sequence indicator.
        #If seq has PDB file or Maestro entry struct associated with it.
        self.has_structure = False
        #If sequence corresponds to entry currently included in Maestro wkspce.
        self.maestro_included = False
        self.color = (0, 0, 0)  #RGB for displayed color of sequence name.
        self.parent_sequence = None  #None if this is a parent sequence.
        self.ssb_bond_list = []  #SS bridges list
        self.custom_color = (255, 255, 255)  #for the entire sequence.

[docs]    def appendResidue(self, residue):
        """
        Appends a new residue to self.

        :type residue : sequence alignment Residue object
        """
        if residue:
            self.residues.append(residue)

[docs]    def appendResidues(self, codes, use_numbers=False):
        """
        Create new residues based on a single-code string and append them
        to existing sequence. Converts upper-case characters to lower-case,
        recognize gaps ('.', '-', '~') and ignore other characters.

        :param codes: single-code amino acid
        :type codes: string

        :type use_numbers: boolean
        :param use_numbers: If true, this function will try to recognize
                            residue numbers included in the sequence
                            and assign them to the residues.
        """
        index = 1
        if self.length():
            index = self.residues[-1].num
        res_list = []
        domidx = 1
        nums = ""
        for ch in codes:
            if not ch:
                continue
            ch = str(ch).upper()
            if ch >= '0' and ch <= '9':
                nums += ch
            elif ch > ' ' and \
                ((ch >= 'A' and ch <= 'Z') or
                 ch in ['.', '-', '~']):
                if nums:
                    index = int(nums)
                    nums = ""
                res = Residue()
                res.num = index
                if ch in ['.', '-', '~']:
                    ch = constants.UNLOCKED_GAP_SYMBOL
                    res.is_gap = True
                else:
                    index = index + 1
                res.code = ch
                res.makeName()
                res.sequence = self
                res_list.append(res)
        self.residues += res_list

[docs]    def removeStructureless(self):
        """
        Removes structureless (SEQRES) residues from the sequence
        and its children.
        """
        for child in self.children:
            child.residues = [
                res for parent_res, res in zip(self.residues, child.residues)
                if not parent_res.structureless
            ]
        self.residues = [res for res in self.residues if not res.structureless]

[docs]    def replaceSequence(self, new_sequence):
        """
        This method replaces current sequence with the provided string.

        :type new_sequence: str
        :param new_sequence: Must be same gapless length as old sequence.

        :rtype: bool
        :return: True if successful
        """
        self.removeStructureless()
        gapless = self.gaplessResidues()
        gapless_new = delete_from_str(new_sequence, ' .~-')
        if len(gapless) != len(gapless_new):
            # Something is wrong; gapless lengths don't match.
            return False
        pos = 0
        new_residues = []
        for c in new_sequence:
            if c in ".~-":
                res = Residue()
                res.is_gap = True
                res.code = '~'
                res.sequence = self
            else:
                res = gapless[pos]
                pos += 1
            new_residues.append(res)
        self.residues = new_residues
        self.propagateGapsToChildren()
        return True

[docs]    def toString(self, with_gaps=True):
        """
        Returns a string representation of self.

        :type with_gaps: boolean (default=True)
        :param with_gaps: optional parameter, if True the returned string will
            include gaps, if False - only actual residue codes.
        """
        out_string = ""
        for res in self.residues:
            if not res.is_gap or with_gaps:
                out_string += res.code
        return out_string

[docs]    def text(self):
        """
        Returns self as a string.
        """
        return "".join([res.code for res in self.residues])

[docs]    def gaplessText(self):
        """
        Returns self as a gapless string.
        """
        return "".join([res.code for res in self.residues if not res.is_gap])

[docs]    def copyForUndo(self, deep_copy=True):
        if deep_copy:
            sequence_copy = copy.deepcopy(self)
        else:
            sequence_copy = copy.copy(self)
        return sequence_copy

[docs]    def length(self):
        """
        Returns a length of the sequence.

        :rtype: int
        :return: lengh of the sequence
        """
        return len(self.residues)

[docs]    def unpaddedLength(self):
        """
        Returns a length of the sequence with rightmost gaps stripped out.

        :rtype: int
        :return: length of the stripped sequence
        """
        total = len(self.residues) - 1
        if total <= 0:
            return 0
        while total and self.residues[total].is_gap:
            total -= 1
        return total

[docs]    def gaplessLength(self):
        """
        Returns a length of the sequence excluding gaps.

        :rtype: int
        :return: actual sequence length (number of residues)
        """
        return len(self.gaplessResidues())

[docs]    def gaplessResidues(self):
        """
        Returns a list of gapless residues.
        """
        return [res for res in self.residues if not res.is_gap]

[docs]    def numberOfGaps(self):
        """
        Returns a number of gaps in the sequence.

        :rtype: int
        :return: number of gaps in the sequence
        """
        return sum(1 for res in self.residues if res.is_gap)

[docs]    def countActiveGaps(self, pos):
        count = 0
        while pos < self.length():
            if not self.residues[pos].active:
                break
            if self.residues[pos].is_gap:
                count += 1
            pos += 1
        if pos == self.length():
            count = -1
        return count

[docs]    def getResidue(self, index, ungapped=False, hidden=True):
        """
        Returns a residue at a given sequence position, or None if the
        given position is invalid.

        :type index: int
        :param index: sequence position

        :rtype: `Residue`
        :return: residue for a given position, or None if the position
            is invalid
        """
        if hidden and self.tmp_residues:
            residues = self.tmp_residues
        else:
            residues = self.residues
        if ungapped:
            res_list = [res for res in residues if not res.is_gap]
        else:
            res_list = residues
        if index >= 0 and \
           index < len(res_list):
            return res_list[index]
        return None

[docs]    def getResidueIndex(self, id):
        """
        Returns index of residue with given id

        :type id : string
        :param id : str(res.num) + str(res.icode)

        :rtype : int if valid id, None if not
        :return : index of res if valid id, None if not
        """
        id = id.strip().rstrip()
        for index, res in enumerate(self.residues):
            if res.id().rstrip() == id:
                return index
        return None

[docs]    def getUngappedIndex(self, index):
        """
        Returns a residue index corresponding to ungapped position.

        :type index: int
        :param index: Residue index in gapped sequence

        :rtype: int
        :return: Index in ungapped sequence.
        """
        if index < 0 or index >= self.length():
            return -1
        res = self.residues[index]
        if res.is_gap:
            return -1
        res_list = self.gaplessResidues()
        return res_list.index(res)

[docs]    def insertGaps(self, position, n_gaps, active=True):
        """
        Inserts a specified number of gaps at a specified position.

        :type position: int
        :param position: sequence position where the gaps will be inserted

        :type n_gaps: int
        :param n_gaps: number of gaps to be inserted at the position

        :rtype: int
        :return: number of gaps actually inserted at the position
        """
        n_inserted = 0
        for gap in range(n_gaps):
            res = Residue()
            res.code = constants.UNLOCKED_GAP_SYMBOL
            res.is_gap = True
            res.sequence = self
            res.active = active
            self.residues.insert(position, res)
            n_inserted = n_inserted + 1
        return n_inserted

[docs]    def removeGaps(self, position, n_gaps):
        """
        Removes a specified number of gaps (or less) at a given position,
        starting from position and going to C-terminus. (towards higher index)

        :type position: int
        :param position: sequence position from where the gaps will be removed

        :type n_gaps: int
        :param n_gaps: number of gaps to be removed at the position

        :rtype: int
        :return: number of gaps actually removed at the position
        """
        n_removed = 0
        for pos in range(position, len(self.residues)):
            if pos >= 0 and pos < self.length():
                while pos >= 0 and pos < self.length() and \
                 self.residues[pos].is_gap and \
                 self.residues[pos].code == constants.UNLOCKED_GAP_SYMBOL:
                    if not self.residues[pos].active:
                        break
                    self.residues.pop(pos)
                    n_removed = n_removed + 1
                    n_gaps = n_gaps - 1
                    if n_gaps == 0:
                        break
            if n_gaps == 0:
                break
        return n_removed

[docs]    def removeGapsBackwards(self, position, n_gaps):
        """
        Removes a specified number of gaps (or less) at a given position,
        starting at the position and going to N-terminus. (towards lower index)

        :type position: int
        :param position: sequence position from where the gaps will be removed

        :type n_gaps: int
        :param n_gaps: number of gaps to be removed at the position

        :rtype: int
        :return: number of gaps actually removed at the position
        """
        n_removed = 0
        for pos in range(position, -1, -1):
            if pos < 0 or pos >= len(self.residues):
                continue
            while self.residues[pos].is_gap and \
                    self.residues[pos].code == constants.UNLOCKED_GAP_SYMBOL:
                if not self.residues[pos].active:
                    break
                self.residues.pop(pos)
                n_removed = n_removed + 1
                n_gaps = n_gaps - 1
                if n_gaps == 0:
                    break
            if n_gaps == 0:
                break
        return n_removed

[docs]    def removeAllGaps(self, selected_only=False):
        """
        Removes all gaps from the sequence. If selected_only, only removes gaps
        if gaps are selected.
        """
        any_selected = self.hasSelectedResidues()
        if selected_only and any_selected:
            self.residues = [
                res for res in self.residues
                if not res.is_gap or not res.selected
            ]
        else:
            self.residues = self.gaplessResidues()

[docs]    def unselectResidues(self):
        """
        Unselects all residues in the sequence
        """
        for res in self.residues:
            res.selected = False
        for child in self.children:
            for res in child.residues:
                res.selected = False

[docs]    def selectAllResidues(self):
        for res in self.residues:
            res.selected = True
        for child in self.children:
            for res in child.residues:
                res.selected = True

[docs]    def invertSelection(self):
        for res in self.residues:
            if res.selected:
                res.selected = False
            else:
                res.selected = True
        for child in self.children:
            for res in child.residues:
                if res.selected:
                    res.selected = False
                else:
                    res.selected = True

[docs]    def hasSelectedResidues(self):
        """
        :rtype: bool
        :return: True if any of the residues are selected, False otherwise
        """
        for res in self.residues:
            if res.selected:
                return True
        return False

[docs]    def hasSelectedChildren(self):
        """
        Returns True if any of its children are selected.
        """
        return any([child.selected for child in self.children])

[docs]    def hasAllSelectedResidues(self):
        """
        Checks if all residues in the sequence are selected.

        :rtype: bool
        :return: True if all residues are selected, False otherwise
        """
        return all([res.selected for res in self.residues])

[docs]    def deleteSelectedResidues(self):
        """
        Removes all selected residues from the sequence.
        """
        self.residues = [res for res in self.residues if not res.selected]

[docs]    def hideChildren(self):
        """
        Hides all child sequences (effectively collapsing the sequence).
        """
        self.collapsed = True

[docs]    def showChildren(self):
        """
        Shows all child sequences (effectively expanding the sequence).
        """
        self.collapsed = False

[docs]    def calculatePlotValues(self,
                            half_window_size,
                            min_value=None,
                            max_value=None):
        """
        Calculates window-averaged plot values, and the plot value extrema.

        :type half_window_size: int
        :param half_window_size: half-size of the window (can be 0 if not
            averaging)

        :type min_value: float
        :param min_value: optional minimum value, if None then the minimum
            will be calculated

        :type max_value: float
        :param max_value: optional maximum value, if None then the minimum
            will be calculated
        """
        if self.type == constants.SEQ_ANNOTATION:
            total = 0.0
            for pos in range(self.length()):
                total = 0.0
                n_total = 0
                for win_pos in range(-half_window_size, half_window_size + 1):
                    seq_pos = pos + win_pos
                    if seq_pos >= 0 and seq_pos < self.length():
                        value = self.residues[seq_pos].value
                        n_total += 1
                        if value:
                            total += value
                total /= float(n_total)
                self.residues[pos].avg_value = total
                self.residues[pos].previous_avg_value = total
                self.residues[pos].next_avg_value = total
            min = max = total
            prev_res = None
            for res in self.residues:
                if res.avg_value < min:
                    min = res.avg_value
                if res.avg_value > max:
                    max = res.avg_value
                if prev_res:
                    res.previous_avg_value = prev_res.avg_value
                    prev_res.next_avg_value = res.avg_value
                prev_res = res
            if min_value is None:
                self.min_avg_value = min
            else:
                self.min_avg_value = min_value
            if max_value is None:
                self.max_avg_value = max
            else:
                self.max_avg_value = max_value

[docs]    def propagateGapsToChildren(self, target_child=None):
        """
        Propagates gaps from a parent sequence to all children. This method
        should be called after loading multiple alignment in order to ensure
        gap consistency between parent sequence and its children.

        :type target_child: `Sequence`
        :param target_child: If specified, only this child sequence
            will be used.
        """
        # First, remove all gaps from the children sequences.
        for child in self.children:
            if not target_child or target_child == child:
                child.removeAllGaps()
        # Insert gaps into the child sequences.
        for pos in range(len(self.residues)):
            res = self.residues[pos]
            if res.is_gap:
                for child in self.children:
                    if not target_child or target_child == child:
                        gap_res = Residue()
                        gap_res.code = res.code
                        gap_res.is_gap = True
                        gap_res.sequence = child
                        child.residues.insert(pos, gap_res)

[docs]    def propagateGaps(self, sequence, parent_sequence=None, replace=False):
        """
        Propagates gaps from self to a given sequence. Sequence
        is supposed to be a subset of self.

        :rtype: list of `Residue`
        :return: list of residues including gaps at matching positions
        """
        for index, res in enumerate(self.residues):
            res._index = index
        residue_list = sequence.gaplessResidues()
        seq1 = sequence.gaplessText()
        seq2 = self.gaplessText()
        if seq1 not in seq2:
            return None
        pos = seq2.find(seq1)
        index = self.gaplessResidues()[pos]._index
        new_list = []
        for gap_pos in range(index):
            res = Residue()
            res.code = constants.UNLOCKED_GAP_SYMBOL
            res.is_gap = True
            res.sequence = parent_sequence
            new_list.append(res)
        position = 0
        for res in self.residues[index:]:
            if res.is_gap:
                gap_res = res.copy()
                gap_res.sequence = parent_sequence
                new_list.append(gap_res)
            elif position < len(residue_list):
                res = residue_list[position]
                res.sequence = parent_sequence
                new_list.append(res)
                position += 1
        if replace and new_list:
            sequence.residues = new_list
        return new_list

[docs]    def calcIdentity(self, reference_sequence, consider_gaps, in_columns):
        """
        This method calculates sequence identity between self and a specified
        reference sequence, assuming that both sequences are already aligned.

        :type reference_sequence: `Sequence`
        :param reference_sequence: reference sequence

        :type consider_gaps: bool
        :param consider_gaps: Should we include gaps in the calculation.

        :rtype: float
        :return: sequence identity (between 0.0 and 1.0)
        """
        if not reference_sequence:
            return None
        length = self.length()
        if length > reference_sequence.length():
            length = reference_sequence.length()
        id = 0
        real_length = 0
        for pos in range(length):
            res = self.residues[pos]
            ref_res = reference_sequence.residues[pos]
            if res.is_gap and ref_res.is_gap:
                continue
            if not (ref_res.is_gap or res.is_gap) or consider_gaps:
                if not in_columns or (res.selected and ref_res.selected):
                    if res.code == ref_res.code:
                        id += 1
                    real_length += 1
        if real_length > 0:
            return old_div(float(id), real_length)
        return 0.0

[docs]    def calcSimilarity(self, reference_sequence, consider_gaps, in_columns):
        """
        This method calculates sequence similarity between self and a specified
        reference sequence, assuming that both sequences are already aligned.

        :type reference_sequence: `Sequence`
        :param reference_sequence: reference sequence

        :type consider_gaps: bool
        :param consider_gaps: Should we include gaps in the calculation.

        :rtype: float
        :return: sequence similarity (between 0.0 and 1.0)
        """
        if not reference_sequence:
            return None
        length = self.length()
        if length > reference_sequence.length():
            length = reference_sequence.length()
        id = 0
        real_length = 0
        for pos in range(length):
            res = self.residues[pos]
            ref_res = reference_sequence.residues[pos]
            score = utils.matrixValue(constants.SIMILARITY_MATRIX, res.code,
                                      ref_res.code)
            if res.is_gap and ref_res.is_gap:
                continue
            if not (ref_res.is_gap or res.is_gap) or consider_gaps:
                if not in_columns or (res.selected and ref_res.selected):
                    if score > 0.0:
                        id += 1
                    real_length += 1
        if real_length > 0:
            return old_div(float(id), real_length)
        return 0.0

[docs]    def calcHomology(self, reference_sequence, consider_gaps, in_columns):
        """
        This method calculates sequence homolgy between self and a specified
        reference sequence, assuming that both sequences are already aligned.
        The homology criterion is based on "side chain chemistry" descriptor
        matching.

        :type reference_sequence: `Sequence`
        :param reference_sequence: reference sequence

        :type consider_gaps: bool
        :param consider_gaps: Should we include gaps in the calculation.

        :rtype: float
        :return: sequence similarity (between 0.0 and 1.0)
        """
        if not reference_sequence:
            return None
        hom_dict = {
            "D": 1,
            "E": 1,
            "R": 2,
            "K": 2,
            "H": 2,
            "G": 3,
            "A": 3,
            "V": 3,
            "I": 3,
            "L": 3,
            "M": 3,
            "F": 4,
            "Y": 4,
            "W": 4,
            "S": 5,
            "T": 5,
            "N": 5,
            "Q": 5,
            "C": 6,
            "P": 7
        }
        length = self.length()
        if length > reference_sequence.length():
            length = reference_sequence.length()
        id = 0
        real_length = 0
        for pos in range(length):
            res = self.residues[pos]
            ref_res = reference_sequence.residues[pos]
            if res.is_gap and ref_res.is_gap:
                continue
            if not (ref_res.is_gap or res.is_gap) or consider_gaps:
                if not in_columns or (res.selected and ref_res.selected):
                    if (res.code in hom_dict and ref_res.code in hom_dict and
                            hom_dict[res.code] == hom_dict[ref_res.code]):
                        id += 1
                    real_length += 1
        if real_length > 0:
            return old_div(float(id), real_length)
        return 0.0

[docs]    def calcScore(self, reference_sequence, consider_gaps, in_columns):
        """
        This method calculates sequence similarity score between self and
        a specified reference sequence, assuming that both sequences
        are already aligned.

        :type reference_sequence: `Sequence`
        :param reference_sequence: reference sequence

        :rtype: float
        :return: sequence similarity score
        """
        if not reference_sequence:
            return None
        length = self.length()
        if length > reference_sequence.length():
            length = reference_sequence.length()
        score = 0.0
        for pos in range(length):
            res = self.residues[pos]
            ref_res = reference_sequence.residues[pos]
            if res.is_gap and ref_res.is_gap:
                continue
            if not in_columns or (res.selected and ref_res.selected):
                score += utils.matrixValue(constants.SIMILARITY_MATRIX,
                                           res.code, ref_res.code)
        return score

[docs]    def previousUngappedResidue(self, position):
        if position < 0 or position >= self.length():
            return None
        position -= 1
        while position >= 0:
            if not self.residues[position].is_gap:
                return self.residues[position]
            position -= 1
        return None

[docs]    def nextUngappedResidue(self, position):
        if position < 0 or position >= self.length():
            return None
        position += 1
        while position < self.length():
            if not self.residues[position].is_gap:
                return self.residues[position]
            position += 1
        return None

[docs]    def ungappedId(self, position, start, end, backwards=False):
        """
        Returns residue ID for the first ungapped position in a specified
        region, starting from position and going forward or backwards. If no
        valid position is found (i.e. all residues in the specified region are
        gaps), returns an empty string.

        :type start: int
        :param start: lower boundary of the search region

        :type end: int
        :param end: upper boundary of the search region

        :type position: int
        :param position: initial position

        :type backwards: bool
        :param backwards: if True, search the sequence backwards

        :rtype: string
        :return: ungapped residue ID, or empty string if no valid residue
            is found
        """
        if start < 0 or start > self.length() or \
           end < 0 or end > self.length() or \
           position < 0 or position >= self.length():
            return ''
        if backwards:
            while position >= start:
                res = self.residues[position]
                if not res.is_gap:
                    return res.id()
                position -= 1
        else:
            while position < end:
                res = self.residues[position]
                if not res.is_gap:
                    return res.id()
                position += 1
        # Not found, return an empty string.
        return ''

[docs]    def hasAnnotationType(self, annotation_type):
        """
        Checks if the sequence already has this annotation type.

        :type annotation_type: int
        :param annotation_type: annotation type

        :rtype: bool
        :return: True if the sequence has this annotation type already,
            False otherwise
        """
        for child in self.children:
            if child.annotation_type == annotation_type:
                child.visible = True
                return True
        return False

[docs]    def sanitize(self):
        """
        Removes all gaps and illegal residue codes from self.
        """
        amino_acids = list(constants.AMINO_ACIDS)
        amino_acids.append('-')
        amino_acids.append('~')
        self.residues = [
            res for res in self.residues if res.code in amino_acids
        ]
        self.makeShortName()

[docs]    def makeInactive(self):
        if self.type == constants.SEQ_AMINO_ACIDS:
            for res in self.residues:
                res.active = False

[docs]    def makeActive(self):
        if self.type == constants.SEQ_AMINO_ACIDS:
            for res in self.residues:
                res.active = True

[docs]    def haveAnchors(self, pos):
        while pos < self.length():
            if not self.residues[pos].active:
                return True
            pos += 1
        return False

[docs]    def inactivePosition(self, pos):
        """
        Finds first inactive residue position after given position.

        :type pos: int
        :param pos: start position in sequence to begin search

        :rtype: int
        :return: position of first inactive res. If none, returns -1
        """
        while pos < self.length():
            if not self.residues[pos].active:
                return pos
            pos += 1
        return -1

[docs]    def makeShortName(self, name=None):
        """
        This method converts a long sequence name into a short name
        that is displayed on a screen.
        """
        if self.short_name and name is None:
            # remove redundant chain names
            while len(self.short_name) > 2 and self.short_name[-2] == '_' and \
                    self.short_name[-1] >= 'A' and self.short_name[-1] <= 'Z':
                self.short_name = self.short_name[:-2]
            return
        name_split = ""
        if not name:
            name = self.name
        if name:
            if name[0] >= '0' and name[0] <= '9' and (len(name) == 4 or \
                                (len(name) > 4 and name[4] == '_')):
                name = name[:4].upper() + name[4:]
            name_split = re.split("[ |,:]", name)
            if len(name_split) > 1:
                if name_split[0] == "gi" or name_split[0] == "pdb" or \
                   name_split[0] == "sp":
                    self.short_name = name_split[1].upper()
                else:
                    self.short_name = name_split[0]
            else:
                self.short_name = name
            if len(name_split) > 2 and len(name_split[2]) == 1:
                self.chain_id = name_split[2]
        else:
            self.name = "Sequence"
            self.short_name = "Sequence"

[docs]    def createAnnotationSequence(self):
        """
        Creates an empty annotation.
        """
        plot = Sequence()
        plot.parent_sequence = self
        plot.type = constants.SEQ_ANNOTATION
        length = self.gaplessLength()
        plot.residues = [Residue(sequence=plot) for index in range(length)]
        self.children.append(plot)
        self.propagateGapsToChildren(target_child=plot)
        return plot

[docs]    def createSecondaryAssignment(self):
        """
        Creates an empty secondary structure assignment annotation.
        """
        seq = Sequence()
        seq.chain_id = self.chain_id
        seq.type = constants.SEQ_SECONDARY
        seq.name = "Secondary Structure Assignment :  " + self.chain_id
        seq.short_name = "SSA"
        seq.parent_sequence = self
        seq.residues = [
            Residue(sequence=seq) for index in range(self.gaplessLength())
        ]
        # Because seq is not child of self.sequences, gaps not propagated.
        self.propagateGapsToChildren(target_child=seq)
        return seq

[docs]    def createSSBondAssignment(self):
        """
        Creates an empty disulfide bond assignment annotation.
        """
        seq = Sequence()
        seq.chain_id = self.chain_id
        seq.type = constants.SEQ_ANNOTATION
        seq.annotation_type = constants.ANNOTATION_SSBOND
        seq.name = "Disulfide Bonds :  " + self.chain_id
        seq.short_name = "SSBOND"
        seq._tmp_bond_list = []
        seq.bond_list = []
        seq.parent_sequence = self
        seq.residues = [
            Residue(sequence=seq) for index in range(self.gaplessLength())
        ]
        # Because seq is not a child of self.sequences, gaps not propagated:
        self.propagateGapsToChildren(target_child=seq)
        return seq

[docs]    def compare(self, sequence):
        """
        Compares gapless version of self with other sequences and calculates
        identity between both.
        """
        res_list1 = self.gaplessResidues()
        res_list2 = sequence.gaplessResidues()
        if len(res_list1) != len(res_list2):
            return -1
        if len(res_list1) == 0:
            return -1
        id = 0
        for pos in range(len(res_list1)):
            if res_list1[pos].code == res_list2[pos].code:
                id += 1
        return old_div(id, len(res_list1))

[docs]    def getPDBId(self, with_chain=True):
        """
        This function tries to generate a PDB ID based on the sequence name.

        It supports different name formats: 1abcD, pdb|1abc|D, 1ABCD
        If the conversion fails, it will return an empty string.
        """
        pdb_id = ""
        short_name = self.short_name
        short_name = delete_from_str(short_name, '.|:,_#!')
        if short_name.startswith("pdb"):
            short_name = short_name[3:]
        if len(short_name) and \
           not short_name[0].isalnum():
            short_name = short_name[1:]
        if len(short_name) and \
           short_name[0].isdigit():
            pdb_id += short_name[0]
            short_name = short_name[1:]
            if len(short_name) >= 3:
                if not short_name[0].isalnum():
                    short_name = short_name[1:]
                code = short_name[0:3].lower()
                if code.isalnum():
                    pdb_id += code
                    short_name = short_name[3:]
                    if len(short_name):
                        if not short_name[0].isalnum():
                            short_name = short_name[1:]
                        if short_name[0].isalnum():
                            pdb_id += short_name[0].upper()
        if with_chain and self.chain_id > ' ':
            pdb_id += self.chain_id
        return pdb_id

[docs]    def isValidTemplate(self, reference=None):
        if self.visible and self != reference and \
            (self.has_structure or self.from_maestro) and self.gaplessLength():
            return True
        return False

[docs]    def isValidProtein(self, global_annotation=False):
        if self.visible:
            if self.type == constants.SEQ_AMINO_ACIDS:
                return True
            if global_annotation and (self.type == constants.SEQ_CONSENSUS or \
                                      self.type == constants.SEQ_LOGO or \
                                      self.short_name == "Sequence Profile"):
                return True
        return False

[docs]    def isRuler(self):
        return self.type == constants.SEQ_RULER

[docs]    def isDNA(self):
        """
        Returns True if the sequence is DNA sequence.
        """
        text = self.gaplessText()
        for res in text:
            if res not in "CTGA":
                return False
        return True

[docs]    def translateDNA(self, translation_table=constants.TRANSLATION_TABLE_DNA):
        """
        Translates the sequence from nucleotide codes to amino acids.
        """
        DNA_codes = set('CTGA')
        text = self.gaplessText()
        text = ''.join(res for res in text if res in DNA_codes)
        try:
            translated_text = ''.join(
                    [translation_table.get(text[3 * i:3 * i + 3], 'X') for \
                     i in range(old_div(len(text), 3))])
        except:
            return False
        self.residues = []
        self.appendResidues(translated_text)
        self.children = []
        return True

[docs]    def isRNA(self):
        """
        Returns True if the sequence is RNA sequence.
        """
        text = self.gaplessText()
        for res in text:
            if res not in "CUGA":
                return False
        return True

[docs]    def translateRNA(self, translation_table=constants.TRANSLATION_TABLE_RNA):
        """
        Translates the sequence from nucleotide codes to amino acids.
        """
        RNA_codes = set('CUGA')
        text = self.gaplessText()
        text = ''.join(res for res in text if res in RNA_codes)
        try:
            translated_text = ''.join([translation_table.get(
                                text[3 * i:3 * i + 3], 'X') for i in \
                                range(old_div(len(text), 3))])
        except:
            return False
        self.residues = []
        self.appendResidues(translated_text)
        self.children = []
        return True

[docs]    def renumberResidues(self, start, incr, preserve_ins_codes=False):
        num = start
        for res in self.residues:
            res.num = num
            num += incr
            if preserve_ins_codes:
                continue
            res.icode = ' '

[docs]    def getValues(self, gapless=False):
        """
        Returns a list of residue values.
        """
        if gapless:
            residues = self.gaplessResidues()
        else:
            residues = self.residues
        return [res.value for res in residues]

[docs]    def isSortable(self, reference=None):
        """
        Returns True if the sequence is sortable, False otherwise.
        """
        if (self.global_sequence or self.type == constants.SEQ_RULER or
                self.type == constants.SEQ_SEPARATOR or
                self.type == constants.SEQ_HISTORY or
                self.type == constants.SEQ_CONSTRAINTS or
                self.parent_sequence or self == reference):
            return False
        return True

[docs]    def repair(self):
        """
        Repairs the sequence by setting sequence-residue associations
        for all residues. Also, adds missing attributes (using
        default values) to the sequence.
        """
        for res in self.residues:
            res.repair()
            res.sequence = self
            for child in self.children:
                child.repair()
        # Add missing attributes
        empty_sequence = Sequence()
        for attr in list(empty_sequence.__dict__):
            if not hasattr(self, attr):
                setattr(self, attr, getattr(empty_sequence, attr))