Source code for schrodinger.protein.residue

# -*- coding: utf-8 -*-

import copy
import functools
import itertools
import types
import typing
import weakref
from enum import Enum

import decorator

from schrodinger import structure
from schrodinger.models import json
from schrodinger.protein import annotation
from schrodinger.protein import constants
from schrodinger.protein import nonstandard_residues
from schrodinger.protein import properties

_UNKNOWN_NAME = "Unknown"
_DEFAULT_SIMILARITY = 0.0
_HOMOLOGY_GROUPS = {
    "D": 1,
    "E": 1,
    "R": 2,
    "K": 2,
    "H": 2,
    "G": 3,
    "A": 3,
    "V": 3,
    "I": 3,
    "L": 3,
    "M": 3,
    "F": 4,
    "Y": 4,
    "W": 4,
    "S": 5,
    "T": 5,
    "N": 5,
    "Q": 5,
    "C": 6,
    "P": 7
}


[docs]def check_if_comparable(default_return=0):
    """
    Decorator to return early from residue comparison methods.

    Returns `default_return` if:
    - the other residue is a gap
    - the residues are different types (e.g. protein/DNA or DNA/RNA)
    - either residue is an unknown residue
    """

    @decorator.decorator
    def dec(func, self, other, *args, **kwargs):
        if (other.is_gap or type(self.type) is not type(other.type) or
                self.type.name == _UNKNOWN_NAME or
                other.type.name == _UNKNOWN_NAME):
            return default_return
        return func(self, other, *args, **kwargs)

    return dec


[docs]class ResidueChainKey(typing.NamedTuple):
    """
    Key to partially identify a structured residue relative to its chain
    """
    # TODO MSV-2379 account for items in WHResidue.d_hash
    resnum: int
    inscode: str


[docs]class ResidueKey(typing.NamedTuple):
    """
    Key to partially identify a structured residue.

    Order and items based on `MaestroStructureModel._getKey`
    """
    entry_id: int
    chain: str
    resnum: int
    inscode: str

[docs]    def chainKey(self):
        return ResidueChainKey(self.resnum, self.inscode)


[docs]def get_matrix_value(matrix, first, second):
    """
    Return a similarity matrix value for a specified pair of residues.
    """
    if (first, second) in matrix:
        return matrix[(first, second)]
    if (second, first) in matrix:
        return matrix[(second, first)]
    return _DEFAULT_SIMILARITY


[docs]def box_letter(letter):
    """
    Map an ASCII letter to the circled Unicode variant

    :param letter: ASCII letter to map
    :type letter: str

    :raises ValueError: if the input is not an ASCII letter
    """
    codepoint = ord(letter)
    if 97 <= codepoint <= 122:
        offset = 9327
    elif 65 <= codepoint <= 90:
        offset = 9333
    else:
        raise ValueError(f"Could not map {letter}")
    return chr(codepoint + offset)


[docs]def get_structure_residue_chain_key(structure_residue):
    """
    Creates residue key relative to entry and chain for structure residue.

    :return: (resnum, inscode)
    :rtype: tuple(int, str)
    """
    # TODO MSV-2379 account for all items in WHResidue.d_hash
    return (structure_residue.resnum, structure_residue.inscode)


[docs]def get_residue_key(residue, entry_id, chain):
    """
    Creates residue key for residue.

    :param residue: Residue
    :type residue: Residue

    :param entry_id: Entry ID
    :type entry_id: str or int

    :param chain: Chain name
    :type chain: str

    :return: The residue key containing entry_id, chain, resnum, and inscode
    :rtype: ResidueKey
    """
    # TODO MSV-2379 account for all items in WHResidue.d_hash
    return ResidueKey(int(entry_id), chain, *residue.getChainKey())


[docs]def get_structure_residue_key(structure_residue, entry_id):
    """
    Creates residue key for structure residue.

    :param structure_residue: Structure residue
    :type structure_residue: schrodinger.structure._Residue

    :param entry_id: Entry ID
    :type entry_id: str or int

    :return: The residue key containing entry_id, chain, resnum, and inscode
    :rtype: ResidueKey
    """
    # TODO MSV-2379 account for all items in WHResidue.d_hash
    return ResidueKey(int(entry_id), structure_residue.chain,
                      *get_structure_residue_chain_key(structure_residue))


[docs]def get_formatted_residue(res) -> str:
    """
    Create the formatted residue string.

    :param res: residue
    :type res: residue.Residue

    :return: formatted string
    """
    return f'{res.long_code}{res.resnum}{res.inscode.strip()}'


[docs]def get_formatted_residue_range(start_res, end_res) -> str:
    """
    Create the residue range tooltip.

    :param start_res: starting residue
    :type start_res: residue.Residue

    :param end_res: ending residue
    :type end_res: residue.Residue

    :return: a formatted string to be used
    """
    return f'{get_formatted_residue(start_res)} - {get_formatted_residue(end_res)}'


[docs]def order_contiguous(residues):
    """
    Check if a list of residues is contiguous, and put them in contiguous
    order if they are not.

    :type residues: list
    :param residues: List of `schrodinger.structure._Residue` objects

    :rtype: list
    :return: List of `schrodinger.structure._Residue` objects in contiguous
        order, or None if the residues were not contiguous.
    """

    if not residues:
        return []
    ordered_residues = [residues.pop(0)]
    old_length = -1

    # Continually cycle through, finding one connected residue each time,
    # until we find no more connected residues
    while residues and len(residues) != old_length:
        current_length = len(residues)
        old_length = current_length
        for index in range(current_length):
            if ordered_residues[-1].isConnectedToResidue(residues[index]):
                # Connected to last residue in the ordered list
                ordered_residues.append(residues.pop(index))
                break
            elif residues[index].isConnectedToResidue(ordered_residues[0]):
                # Connected to first residue in the ordered list
                ordered_residues.insert(0, residues.pop(index))
                break

    if residues:
        # There remain some unconnected residues
        return None
    else:
        return ordered_residues


HELIX_PROPENSITY = Enum(
    'HELIX_PROPENSITY',
    ['NoPropensity', 'Likely', 'Weak', 'Ambivalent', 'HelixBreaking'])

BETA_STRAND_PROPENSITY = Enum(
    'BETA_STRAND_PROPENSITY',
    ['NoPropensity', 'StrandBreaking', 'Ambivalent', 'StrandForming'])

TURN_PROPENSITY = Enum(
    'TURN_PROPENSITY',
    ['NoPropensity', 'TurnForming', 'Ambivalent', 'TurnBreaking'])

HELIX_TERMINATION_TENDENCY = Enum(
    'HELIX_TERMINATION_TENDENCY',
    ['NoTendency', 'HelixStarting', 'Ambivalent', 'HelixEnding'])

SOLVENT_EXPOSURE_TENDENCY = Enum(
    'SOLVENT_EXPOSURE_TENDENCY',
    ['NoTendency', 'Surface', 'Ambivalent', 'Buried'])

STERIC_GROUP = Enum('STERIC_GROUP',
                    ['NoSteric', 'Small', 'Ambivalent', 'Polar', 'Aromatic'])

SIDE_CHAIN_CHEM = Enum('SIDE_CHAIN_CHEM', [
    'NoSideChainChem', 'AcidicHydrophilic', 'BasicHydrophilic',
    'NeutralHydrophobicAliphatic', 'NeutralHydrophobicAromatic',
    'NeutralHydrophilic', 'PrimaryThiol', 'IminoAcid'
])

RESIDUE_CHARGE = Enum('RESIDUE_CHARGE', ['Positive', 'Negative', 'Neutral'])

# Color Block Tooltip Map.
CB_TT_MAP = {
    HELIX_PROPENSITY.NoPropensity: "",
    HELIX_PROPENSITY.Likely: "helix-forming",
    HELIX_PROPENSITY.Weak: "weak helix-forming",
    HELIX_PROPENSITY.Ambivalent: "ambivalent",
    HELIX_PROPENSITY.HelixBreaking: "helix-breaking",
    BETA_STRAND_PROPENSITY.NoPropensity: "",
    BETA_STRAND_PROPENSITY.StrandBreaking: "strand-breaking",
    BETA_STRAND_PROPENSITY.Ambivalent: "ambivalent",
    BETA_STRAND_PROPENSITY.StrandForming: "strand-forming",
    TURN_PROPENSITY.NoPropensity: "",
    TURN_PROPENSITY.TurnForming: "turn-forming",
    TURN_PROPENSITY.Ambivalent: "ambivalent",
    TURN_PROPENSITY.TurnBreaking: "turn-breaking",
    HELIX_TERMINATION_TENDENCY.NoTendency: "",
    HELIX_TERMINATION_TENDENCY.HelixStarting: "helix-starting",
    HELIX_TERMINATION_TENDENCY.Ambivalent: "ambivalent",
    HELIX_TERMINATION_TENDENCY.HelixEnding: "helix-ending",
    SOLVENT_EXPOSURE_TENDENCY.NoTendency: "",
    SOLVENT_EXPOSURE_TENDENCY.Surface: "surface",
    SOLVENT_EXPOSURE_TENDENCY.Ambivalent: "ambivalent",
    SOLVENT_EXPOSURE_TENDENCY.Buried: "buried",
    STERIC_GROUP.Small: "small, non-interfering",
    STERIC_GROUP.Ambivalent: "ambivalent",
    STERIC_GROUP.Polar: "sticky polar",
    STERIC_GROUP.Aromatic: "aromatic",
    SIDE_CHAIN_CHEM.AcidicHydrophilic: "acidic, hydrophilic",
    SIDE_CHAIN_CHEM.BasicHydrophilic: "basic, hydrophilic",
    SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic: "neutral, hydrophobic, aliphatic",
    SIDE_CHAIN_CHEM.NeutralHydrophobicAromatic: "neutral, hydrophobic, aromatic",
    SIDE_CHAIN_CHEM.NeutralHydrophilic: "neutral, hydrophilic",
    SIDE_CHAIN_CHEM.PrimaryThiol: "primary thiol",
    SIDE_CHAIN_CHEM.IminoAcid: "imino acid",
    RESIDUE_CHARGE.Positive: "positive",
    RESIDUE_CHARGE.Negative: "negative",
    RESIDUE_CHARGE.Neutral: "neutral",
}

# Non-standard Amino Acids Tooltip Map.
NON_STD_AA_TT_MAP = {
    'LYN': 'deprotonated LYS',
    'ARN': 'deprotonated ARG',
    'ASH': 'protonated ASP',
    'GLH': 'protonated GLU',
    'HID': 'δ-nitrogen protonated HIS',
    'HIE': 'ε-nitrogen protonated HIS',
    'HIP': 'δ and ε-nitrogen protonated HIS'
}

SSA_TT_MAP = {
    structure.SS_NONE: "None",
    structure.SS_LOOP: "Loop",
    structure.SS_HELIX: "Helix",
    structure.SS_STRAND: "Strand",
    structure.SS_TURN: "Turn",
}

DSSP_CODES = {
    "G": "3-turn helix",
    "H": "4-turn helix",
    "I": "5-turn helix",
    "T": "hydrogen bonded turn",
    "E": "extended strand in parallel and/or anti-parallel Beta sheet conformation",
    "B": "residue in isolated Beta-bridge",
    "S": "bend (non-hydrogen-bond based)",
    "C": "coil",
}


[docs]class ElementType(object):

[docs]    def __init__(self, short_code, long_code, name):
        self.short_code = short_code
        self.long_code = long_code
        self.name = name
        self.nonstandard = False

    def __str__(self):
        return self.short_code

    def __repr__(self):
        res_type_repr = "{cls}('{short_code}', '{long_code}', '{name}')".format(
            cls=self.__class__.__name__,
            short_code=self.short_code,
            long_code=self.long_code,
            name=self.name)
        return res_type_repr

[docs]    def makeVariant(self, long_code, short_code=None, *, nonstandard=True):
        """
        Create a variant of an element type with a new long and short code.

        :param long_code: A 2+ character string representing the element type
        :type long_code: str

        :param short_code: A 1 character string representing the element type
        :type short_code: str

        :param nonstandard: Whether the variant should be considered
            nonstandard. If False, the residue will be generally treated
            identically to its standard (e.g. HIE/HIS)
        :type nonstandard: bool

        :return: The variant element type
        :rtype: ElementType
        """
        element_type = copy.deepcopy(self)
        element_type.long_code = long_code
        if short_code is not None:
            element_type.short_code = short_code
        element_type.nonstandard = nonstandard
        return element_type


[docs]class ResidueType(ElementType):

[docs]    def __init__(self,
                 short_code,
                 long_code,
                 name,
                 charge=None,
                 hydrophobicity=None,
                 hydrophilicity=None,
                 helix_propensity=None,
                 beta_strand_propensity=None,
                 turn_propensity=None,
                 helix_termination_tendency=None,
                 exposure_tendency=None,
                 steric_group=None,
                 side_chain_chem=None,
                 isoelectric_point=None):
        super(ResidueType, self).__init__(short_code, long_code, name)
        self.charge = charge
        self.hydrophobicity = hydrophobicity
        self.helix_propensity = helix_propensity
        self.beta_strand_propensity = beta_strand_propensity
        self.turn_propensity = turn_propensity
        self.helix_termination_tendency = helix_termination_tendency
        self.exposure_tendency = exposure_tendency
        self.steric_group = steric_group
        self.side_chain_chem = side_chain_chem
        self.isoelectric_point = isoelectric_point


# TODO MSV-1504 determine what nucleotides need and change parent to ElementType
[docs]class NucleotideType(ResidueType):
    pass


[docs]class DeoxyribonucleotideType(NucleotideType):
    pass


[docs]class RibonucleotideType(NucleotideType):
    pass


# Std Amino Acids
ALANINE = ResidueType(
    "A",
    "ALA",
    "Alanine",
    hydrophobicity=1.80,
    hydrophilicity=-0.50,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.Likely,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent,
    turn_propensity=TURN_PROPENSITY.TurnBreaking,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
    steric_group=STERIC_GROUP.Small,
    side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic,
    isoelectric_point=6.00)
ARGININE = ResidueType(
    "R",
    "ARG",
    "Arginine",
    hydrophobicity=-4.50,
    hydrophilicity=3.00,
    charge=RESIDUE_CHARGE.Positive,
    helix_propensity=HELIX_PROPENSITY.Ambivalent,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent,
    turn_propensity=TURN_PROPENSITY.Ambivalent,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface,
    steric_group=STERIC_GROUP.Polar,
    side_chain_chem=SIDE_CHAIN_CHEM.BasicHydrophilic,
    isoelectric_point=10.76)
ASPARAGINE = ResidueType(
    "N",
    "ASN",
    "Asparagine",
    hydrophobicity=-3.50,
    hydrophilicity=0.20,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.Ambivalent,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent,
    turn_propensity=TURN_PROPENSITY.TurnForming,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface,
    steric_group=STERIC_GROUP.Ambivalent,
    side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophilic,
    isoelectric_point=5.41)
ASPARTIC_ACID = ResidueType(
    "D",
    "ASP",
    "Aspartic acid",
    hydrophobicity=-3.50,
    hydrophilicity=3.00,
    charge=RESIDUE_CHARGE.Negative,
    helix_propensity=HELIX_PROPENSITY.Ambivalent,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming,
    turn_propensity=TURN_PROPENSITY.TurnForming,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding,
    steric_group=STERIC_GROUP.Ambivalent,
    side_chain_chem=SIDE_CHAIN_CHEM.AcidicHydrophilic,
    isoelectric_point=2.77)
CYSTEINE = ResidueType(
    "C",
    "CYS",
    "Cysteine",
    hydrophobicity=2.50,
    hydrophilicity=-1.00,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.Ambivalent,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent,
    turn_propensity=TURN_PROPENSITY.TurnBreaking,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
    steric_group=STERIC_GROUP.Small,
    side_chain_chem=SIDE_CHAIN_CHEM.PrimaryThiol,
    isoelectric_point=5.07)
GLUTAMIC_ACID = ResidueType(
    "E",
    "GLU",
    "Glutamic acid",
    hydrophobicity=-3.50,
    hydrophilicity=3.00,
    charge=RESIDUE_CHARGE.Negative,
    helix_propensity=HELIX_PROPENSITY.Likely,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming,
    turn_propensity=TURN_PROPENSITY.Ambivalent,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface,
    steric_group=STERIC_GROUP.Polar,
    side_chain_chem=SIDE_CHAIN_CHEM.AcidicHydrophilic,
    isoelectric_point=3.22)
GLUTAMINE = ResidueType(
    "Q",
    "GLN",
    "Glutamine",
    hydrophobicity=-3.50,
    hydrophilicity=0.20,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.Likely,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent,
    turn_propensity=TURN_PROPENSITY.Ambivalent,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface,
    steric_group=STERIC_GROUP.Polar,
    side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophilic,
    isoelectric_point=5.65)
GLYCINE = ResidueType(
    "G",
    "GLY",
    "Glycine",
    hydrophobicity=-0.40,
    hydrophilicity=0.00,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.HelixBreaking,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming,
    turn_propensity=TURN_PROPENSITY.TurnForming,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
    steric_group=STERIC_GROUP.Small,
    side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic,
    isoelectric_point=5.97)
HISTIDINE = ResidueType(
    "H",
    "HIS",
    "Histidine",
    hydrophobicity=-3.20,
    hydrophilicity=-0.50,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.Ambivalent,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent,
    turn_propensity=TURN_PROPENSITY.TurnBreaking,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface,
    steric_group=STERIC_GROUP.Aromatic,
    side_chain_chem=SIDE_CHAIN_CHEM.BasicHydrophilic,
    isoelectric_point=7.59)
ISOLEUCINE = ResidueType(
    "I",
    "ILE",
    "Isoleucine",
    hydrophobicity=4.50,
    hydrophilicity=-1.80,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.Weak,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
    turn_propensity=TURN_PROPENSITY.TurnBreaking,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried,
    steric_group=STERIC_GROUP.Ambivalent,
    side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic,
    isoelectric_point=6.02)
LEUCINE = ResidueType(
    "L",
    "LEU",
    "Leucine",
    hydrophobicity=3.80,
    hydrophilicity=-1.80,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.Likely,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
    turn_propensity=TURN_PROPENSITY.TurnBreaking,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried,
    steric_group=STERIC_GROUP.Ambivalent,
    side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic,
    isoelectric_point=5.98)
LYSINE = ResidueType(
    "K",
    "LYS",
    "Lysine",
    hydrophobicity=-3.90,
    hydrophilicity=3.00,
    charge=RESIDUE_CHARGE.Positive,
    helix_propensity=HELIX_PROPENSITY.Likely,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming,
    turn_propensity=TURN_PROPENSITY.Ambivalent,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface,
    steric_group=STERIC_GROUP.Polar,
    side_chain_chem=SIDE_CHAIN_CHEM.BasicHydrophilic,
    isoelectric_point=9.74)
METHIONINE = ResidueType(
    "M",
    "MET",
    "Methionine",
    hydrophobicity=1.90,
    hydrophilicity=-1.30,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.Likely,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
    turn_propensity=TURN_PROPENSITY.TurnBreaking,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried,
    steric_group=STERIC_GROUP.Ambivalent,
    side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic,
    isoelectric_point=5.74)
PHENYLALANINE = ResidueType(
    "F",
    "PHE",
    "Phenylalanine",
    hydrophobicity=2.80,
    hydrophilicity=-2.50,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.Weak,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
    turn_propensity=TURN_PROPENSITY.TurnBreaking,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried,
    steric_group=STERIC_GROUP.Aromatic,
    side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAromatic,
    isoelectric_point=5.48)
PROLINE = ResidueType(
    "P",
    "PRO",
    "Proline",
    hydrophobicity=-1.60,
    hydrophilicity=0.00,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.HelixBreaking,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming,
    turn_propensity=TURN_PROPENSITY.TurnForming,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
    steric_group=STERIC_GROUP.Ambivalent,
    side_chain_chem=SIDE_CHAIN_CHEM.IminoAcid,
    isoelectric_point=6.30)
SERINE = ResidueType(
    "S",
    "SER",
    "Serine",
    hydrophobicity=-0.80,
    hydrophilicity=0.30,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.Ambivalent,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent,
    turn_propensity=TURN_PROPENSITY.TurnForming,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
    steric_group=STERIC_GROUP.Small,
    side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophilic,
    isoelectric_point=5.58)
THREONINE = ResidueType(
    "T",
    "THR",
    "Threonine",
    hydrophobicity=-0.70,
    hydrophilicity=-0.40,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.Ambivalent,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
    turn_propensity=TURN_PROPENSITY.Ambivalent,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
    side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophilic,
    steric_group=STERIC_GROUP.Ambivalent,
    isoelectric_point=5.60)
TRYPTOPHAN = ResidueType(
    "W",
    "TRP",
    "Tryptophan",
    hydrophobicity=-0.90,
    hydrophilicity=-3.40,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.Weak,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
    turn_propensity=TURN_PROPENSITY.TurnBreaking,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
    steric_group=STERIC_GROUP.Aromatic,
    side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAromatic,
    isoelectric_point=5.89)
TYROSINE = ResidueType(
    "Y",
    "TYR",
    "Tyrosine",
    hydrophobicity=-1.30,
    hydrophilicity=-2.30,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.HelixBreaking,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
    turn_propensity=TURN_PROPENSITY.Ambivalent,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
    steric_group=STERIC_GROUP.Aromatic,
    side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAromatic,
    isoelectric_point=5.66)
UNKNOWN = ResidueType("X", "UNK", _UNKNOWN_NAME)
VALINE = ResidueType(
    "V",
    "VAL",
    "Valine",
    hydrophobicity=4.20,
    hydrophilicity=-1.50,
    charge=RESIDUE_CHARGE.Neutral,
    helix_propensity=HELIX_PROPENSITY.Weak,
    beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
    turn_propensity=TURN_PROPENSITY.TurnBreaking,
    helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent,
    exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried,
    steric_group=STERIC_GROUP.Ambivalent,
    side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic,
    isoelectric_point=5.96)

CAPPING_GROUP = ResidueType("X", "", "Capping Group")

UNKNOWN_NA = NucleotideType("N", "UNK", _UNKNOWN_NAME)
#RNA
ADENINE = RibonucleotideType("A", "A", "Adenine")
CYTOSINE = RibonucleotideType("C", "C", "Cytosine")
GUANINE = RibonucleotideType("G", "G", "Guanine")
URACIL = RibonucleotideType("U", "U", "Uracil")

#DNA
dADENINE = DeoxyribonucleotideType("A", "DA", "Adenine")
dCYTOSINE = DeoxyribonucleotideType("C", "DC", "Cytosine")
dGUANINE = DeoxyribonucleotideType("G", "DG", "Guanine")
dTHYMINE = DeoxyribonucleotideType("T", "DT", "Thymine")

# yapf: disable
STD_AMINO_ACIDS = [
    ALANINE, CYSTEINE, ASPARTIC_ACID, GLUTAMIC_ACID, PHENYLALANINE, GLYCINE,
    HISTIDINE, ISOLEUCINE, LYSINE, LEUCINE, METHIONINE, ASPARAGINE, PROLINE,
    GLUTAMINE, ARGININE, SERINE, THREONINE, VALINE, TRYPTOPHAN, TYROSINE,
]
STD_RNA_BASES = [ADENINE, CYTOSINE, GUANINE, URACIL]
STD_DNA_BASES = [dADENINE, dCYTOSINE, dGUANINE, dTHYMINE]


AMINO_ACID_VARIANTS = [
    (ALANINE, [
        "AIB", "ALM", "AYA", "BNN", "CHG", "CSD", ("DAL", "X"),
        "DHA", "DNP", "FLA", "HAC", "MAA", "PRR", "TIH", "TPQ",
    ]),
    (ARGININE, ["AGM", ("DAR", "X"), "HAR", "MMO", "ARM", "ARN", "HMR", "ACL"]),
    (ASPARAGINE, ["MEN", ("DSG", "X")]),
    (ASPARTIC_ACID, [
        "DSP", "BHD", "2AS", "ASQ", "ASB", "ASA", "ASK", "ASH",
        "ASL", ("DAS", "X")
    ]),
    (CYSTEINE,
     [("BCS", "X"), "BUC", "C5C", "C6C", "CCS", "CEA", "CME", "CSO", "CSP",
      "CSS", "CSW", "CSX", "CY1", "CY3", "CYG", "CYM", "CYP", "CYQ",
      "CYX", ("DCY", "X"), "EFC", "OCS", "PEC", "PR3", "SCH", "SCS", "SCY",
      "SHC", "SMC", "SOC"]),
    (GLUTAMIC_ACID,
     ["GLH", "GGL", 'PCA', '5HP', ('DGL', "X"), 'CGU', 'GMA']),
    (GLUTAMINE, [("DGN", "X")]),
    (GLYCINE, ["GLZ", "SAR", 'NMC', 'GL3', 'GSC', 'MPQ', 'MSA']),
    (HISTIDINE, [("DHI", "X"), "HID", "HIC", "HIE", "HIP", "HSD", "HSE",
                 "HSP", "MHS", "NEM", "NEP", "3AH"]),
    (ISOLEUCINE, [('DIL', "X"), 'IIL']),
    (LEUCINE,
     ["BUG", ("NLE", "X"), 'NLP', 'NLN', ('DLE', "X"), 'CLE', 'MLE']),
    (LYSINE, [
        'LYM', 'ALY', 'LYZ', 'LYN', 'LLY', 'LLP', 'SHR', 'TRG',
        ('DLY', "X"), 'KCX'
    ]),
    (METHIONINE, ["FME", "CXM", "OMT", "MSE"]),
    (PHENYLALANINE, ["DAH", ("DPN", "X"), "HPQ", "PHI", "PHL"]),
    (PROLINE, [('DPR', "X"), ('HYP', "X")]),
    (SERINE,
     ['OAS', 'MIS', 'SAC', 'SVA', 'SET', ('SEP', "X"), 'SEL', ("DSN",
                                                               "X")]),
    (THREONINE, ["ALO", "BMT", ("DTH", "X"), "THO", ("TPO", "t")]),
    (TRYPTOPHAN, [("DTR", "X"), "HTR", "LTR", "TPL", "TRO"]),
    (TYROSINE, [("DTY", "X"), "IYR", "PAQ", ("PTR", "y"), "STY", "TYB",
                "TYM", "TYO", "TYQ", "TYS", "TYY"]),
    (VALINE, ["DIV", ("DVA", "X"), "MVA"])
]
NUCLEOBASE_VARIANTS = [
    (ADENINE, ["AMP", "ADP", "ATP", "1MA", "6MA"]),
    (CYTOSINE, ["CMP", "CDP", "CTP", "5MC", "5HC", "5FC", "1CC", "OMC"]),
    (GUANINE, ["GMP", "GDP", "GTP", "1MG", "2MG", "M2G", "7MG", "OMG"]),
    (URACIL, ["UMP", "UDP", "UTP", ("PSU", "Ψ"), "H2U", "5MU", "DU"]),
    (dTHYMINE, ["TMP", "TDP", "TTP"]),
    (UNKNOWN_NA, ["YYG", ("I", "I"), ("DI", "DI")])
]
CAPPING_GROUP_VARIANTS = [
    (CAPPING_GROUP, ['ACE', 'NMA', 'IND', 'NCO', 'ANF', 'TOSG', 'FCO',
                     'MPA', 'NH2'])
]
# yapf: enable


[docs]def make_variants(variants):
    """
    Helper function to create modified amino acids and modified nucleotides

    :param variants: A list of modified residues. The modified residue will have
        all the same properties as the standard one (hydophobicity, charge,
        etc.) but its long code (ie, its PDB residue name) will differ, and if
        a tuple of (string, string) is provided, both its long code and short
        code will differ.
    :param variants: list of (`ResidueType`, list of string or (string,string))

    :returns: a list of residue variants
    :rtype: list of `ResidueType`
    """
    non_std_residues = []
    for res, variant_list in variants:
        for name in variant_list:
            if isinstance(name, tuple):
                three_letter, one_letter = name
            else:
                three_letter, one_letter = name, res.short_code
            variant = res.makeVariant(three_letter, one_letter)
            non_std_residues.append(variant)

    return non_std_residues


[docs]def merge_dicts(*dict_args):
    """
    Merge any number of dictionaries into a single dictionary.

    Note that repeated keys will be silently overwritten with the last value.
    """
    return dict(itertools.chain(*(d.items() for d in dict_args)))


[docs]def make_one_letter_map(res_list):
    return {res.short_code: res for res in res_list}


[docs]def make_three_letter_map(res_list):
    return {res.long_code: res for res in res_list}


# Charge variants have their charge set correctly below.
NON_STD_AMINO_ACIDS = make_variants(AMINO_ACID_VARIANTS)
AMINO_ACIDS = STD_AMINO_ACIDS + NON_STD_AMINO_ACIDS + [UNKNOWN]

STD_AMINO_ACIDS_ONE_LETTER = make_one_letter_map(STD_AMINO_ACIDS)
STD_AMINO_ACIDS_THREE_LETTER = make_three_letter_map(STD_AMINO_ACIDS)
AMINO_ACIDS_THREE_LETTER = make_three_letter_map(AMINO_ACIDS)
AMINO_ACIDS_THREE_LETTER['HIP'].charge = RESIDUE_CHARGE.Positive
for aa in ['ASH', 'ARN', 'GLH', 'HID', 'HIE', 'LYN']:
    AMINO_ACIDS_THREE_LETTER[aa].charge = RESIDUE_CHARGE.Neutral

CAPPING_GROUPS = make_variants(CAPPING_GROUP_VARIANTS)
CAPPING_GROUP_ALPHABET = make_three_letter_map(CAPPING_GROUPS)

AMINO_ACIDS_AND_CAPPING_GROUPS = merge_dicts(STD_AMINO_ACIDS_ONE_LETTER,
                                             AMINO_ACIDS_THREE_LETTER,
                                             CAPPING_GROUP_ALPHABET)
STD_AMINO_ACIDS_AND_CAPPING_GROUPS = merge_dicts(STD_AMINO_ACIDS_ONE_LETTER,
                                                 STD_AMINO_ACIDS_THREE_LETTER,
                                                 CAPPING_GROUP_ALPHABET)
NON_STD_NUCLEOBASES = make_variants(NUCLEOBASE_VARIANTS)
NUCLEOBASES = STD_RNA_BASES + STD_DNA_BASES + NON_STD_NUCLEOBASES
DNA_NUCLEOBASES = STD_DNA_BASES
RNA_NUCLEOBASES = STD_RNA_BASES
DNA_THREE_LETTER = make_three_letter_map(DNA_NUCLEOBASES)
RNA_THREE_LETTER = make_three_letter_map(RNA_NUCLEOBASES)
NA_THREE_LETTER = make_three_letter_map(NUCLEOBASES)
DNA_ONE_LETTER = make_one_letter_map(DNA_NUCLEOBASES)
RNA_ONE_LETTER = make_one_letter_map(RNA_NUCLEOBASES)
DNA_ALPHABET = merge_dicts(DNA_THREE_LETTER, DNA_ONE_LETTER)
RNA_ALPHABET = merge_dicts(RNA_THREE_LETTER, RNA_ONE_LETTER)

ALL_ELEMENT_TYPES = {}
for ele_type in STD_AMINO_ACIDS + CAPPING_GROUPS + STD_RNA_BASES + STD_DNA_BASES + [
        UNKNOWN, UNKNOWN_NA
]:
    ele_type_key = ele_type.short_code, ele_type.long_code, ele_type.name
    ALL_ELEMENT_TYPES[ele_type_key] = ele_type

_nonstandard_residue_db = None
_protein_alphabet = None


[docs]def get_protein_alphabet():
    """
    Return a cached map of amino acid element types.

    Includes definitions from the nonstandard residues database.

    :rtype: types.MappingProxyType
    """
    global _protein_alphabet
    global _nonstandard_residue_db
    if _nonstandard_residue_db is None:
        _nonstandard_residue_db = nonstandard_residues.get_residue_database()
        _nonstandard_residue_db.residuesChanged.connect(
            _invalidate_protein_alphabet)
    if _protein_alphabet is None:
        alphabet = _get_nonstandard_residues()
        alphabet.update(STD_AMINO_ACIDS_AND_CAPPING_GROUPS)

        # Add standard nonstandards (e.g. HID/HIE/HIP)
        alphabet.update(_get_residue_variants())

        unk = UNKNOWN
        alphabet[unk.short_code] = unk
        alphabet[unk.long_code] = unk
        _protein_alphabet = types.MappingProxyType(alphabet)
    return _protein_alphabet


def _invalidate_protein_alphabet():
    global _protein_alphabet
    _protein_alphabet = None


@functools.lru_cache()
def _get_residue_variants():
    """
    Get built-in variants of standard amino acids
    """
    result = dict()
    for long_code, short in structure.RESIDUE_MAP_3_TO_1_LETTER.items():
        if long_code in STD_AMINO_ACIDS_THREE_LETTER:
            continue
        std_type = STD_AMINO_ACIDS_ONE_LETTER.get(short.upper())
        if std_type is not None:
            nonstandard = (short != std_type.short_code)
            variant = std_type.makeVariant(long_code,
                                           short,
                                           nonstandard=nonstandard)
            result[long_code] = variant
    return result


def _get_nonstandard_residues():
    """
    Generate a dictionary of nonstandard amino acid element types from the
    nonstandard residues database.

    :rtype: dict
    """
    result = dict()
    # The database object is cached but this may need to read it from disk
    db = nonstandard_residues.get_residue_database()
    for aa in db.amino_acids:
        if aa.standard:
            continue
        short_code = aa.code
        long_code = aa.name
        aligns_with = aa.aligns_with
        if aligns_with:
            base_element_type = STD_AMINO_ACIDS_ONE_LETTER[aligns_with]
            element_type = base_element_type.makeVariant(long_code)
        else:
            element_type = UNKNOWN.makeVariant(long_code, short_code)
        result[long_code] = element_type
    return result


[docs]def any_structured_residues(residues):
    """
    Returns whether any of the given residues are structured.

    :param residues: The iterable of residues to check
    :type residues: iterable(residue.Residue)

    :return: True if the given residues contain one that is structured.
    :rtype: bool
    """
    return any(res.hasStructure() for res in residues)


[docs]class AbstractSequenceElement(json.JsonableClassMixin):

    # This class intentionally doesn't have an __init__ method since
    # instantiating Gap and Residue objects needs to be as fast as possible and
    # calling super().__init__ would slow down their __init__s.

    def _getNewInstance(self):
        """
        Helper for copying
        """
        return self.__class__()

    def __copy__(self):
        new_res = self._getNewInstance()
        new_res.is_gap = self.is_gap
        new_res.sequence = self.sequence
        return new_res

    def __deepcopy__(self, memo):
        # Same as __copy__ (none of the public attrs should be deepcopied)
        return copy.copy(self)

    @property
    def is_res(self):
        """
        Utility function to check whether a residue is not a gap
        """
        return not self.is_gap

    @property
    def sequence(self):
        """
        The sequence that this element is part of.  Will be `None` if this
        residue is not part of a sequence.  Stored as a weakref to avoid
        circular references.
        :rtype: sequence.AbstractSingleChainSequence
        """
        if self._sequence is None:
            return None
        else:
            return self._sequence()

    @sequence.setter
    def sequence(self, value):
        if value is None:
            self._sequence = None
        else:
            self._sequence = weakref.ref(value)

[docs]    def hasStructure(self):
        """
        :return: Whether this element has a structure i.e. whether it has
            corresponding XYZ coordinates in Maestro.
        :rtype: bool
        """
        seq = self.sequence
        seq_has_structure = seq is not None and seq.hasStructure()
        return self.is_res and not self.seqres_only and seq_has_structure

    @property
    def idx_in_seq(self):
        if self.sequence is None:
            return None
        return self.sequence.index(self)

    @property
    def gapless_idx_in_seq(self):
        """
        Return the index of this residue within its sequence ignoring gaps

        :return: Index of this residue in its sequence or None if it
            is not part of a sequence.
        :rtype: int or None
        """
        if self.sequence is None:
            return None

        return self.sequence.index(self, ignore_gaps=True)


[docs]class Gap(AbstractSequenceElement):

    __slots__ = ("_sequence")
    is_gap = True

[docs]    def __init__(self):
        self.sequence = None

    def __str__(self):
        return "~"

[docs]    def toJsonImplementation(self):
        return {'is_gap': self.is_gap}

[docs]    @classmethod
    def fromJsonImplementation(cls, json_obj):
        if json_obj.get('is_gap', None) is not True:
            raise ValueError('Attempting to deserialize a non-gap object')
        return cls()


[docs]class Residue(AbstractSequenceElement):
    """
    An amino acid residue.
    """

    __slots__ = (
        "_sequence",
        "type",
        "_inscode",
        "_resnum",
        "seqres_only",
        "disulfide_bond",
        "pred_disulfide_bond",
        "b_factor",
        "molecule_number",
        "pfam",  # Either None or 1-char string
        "pred_secondary_structure",  # Structure.SS_HELIX/SS_STRAND/etc
        "secondary_structure",  # structure.SS_HELIX/SS_STRAND/etc
        "pred_accessibility",  # predictors.SolventAccessibility
        "pred_disordered",  # predictors.Disordered
        "pred_domain_arr",  # predictors.DomainArrangement
        "area",  # Solvent accessible area
        "composition",  # Amino acid composition in profile residues.
        "domains",  # name of the domain(s) to which the residue belongs
        "kinase_features",  # kinase feature label
        "_kinase_conservation",  # kinase conservation category
        "_descriptors",  # dict mapping descriptor name to val
    )
    # Default values for any attributes that shouldn't default to None;
    # must not be mutable
    _DEFAULT_ATTR_VALS = {"area": 0.0}
    is_gap = False

    @property
    def descriptors(self):
        if self._descriptors is None:
            self._descriptors = dict()
        return self._descriptors

    @property
    def kinase_conservation(self):
        if self._kinase_conservation is None:
            self._kinase_conservation = dict()
        return self._kinase_conservation

[docs]    def __init__(self,
                 element_type,
                 inscode=None,
                 resnum=None,
                 seqres_only=False):
        """
        :param element_type: The kind of the residue
        :type element_type: ResidueType

        :param inscode: The insertion code
        :type inscode: str

        :param resnum: PDB residue number
        :type resnum: int

        :param seqres_only: Whether this residue only appears in the SEQRES
            record of a structure.  Only applies to sequences associated with a
            structure.
        :type seqres_only: bool
        """
        self.type = element_type
        self._inscode = inscode
        self._resnum = resnum
        self.seqres_only = seqres_only
        # Do *not* add new instance attributes here, as it will slow down
        # Residue instantiation.  Instead, add the attribute name to __slots__
        # and add the initial value to `_DEFAULT_ATTR_VALS` unless the initial
        # value should be None.

    def __getattr__(self, attr):
        if attr not in self.__slots__:
            raise AttributeError(
                f"'{self.__class__.__name__}' has no attribute '{attr}'")
        val = self._DEFAULT_ATTR_VALS.get(attr)
        # set the value of the attribute so that repeatedly accessing it doesn't
        # incur time penalties from repeated __getattr__ calls
        setattr(self, attr, val)
        return val

[docs]    def toJsonImplementation(self):

        json_dict = {
            'inscode': self.inscode,
            'resnum': self.resnum,
            'element_type': [
                self.type.short_code, self.type.long_code, self.type.name
            ],
            'seqres_only': self.seqres_only,
        }
        for key, val in (
            ('b_factor', self.b_factor),
            ('secondary_structure', self.secondary_structure),
            ('pred_secondary_structure', self.pred_secondary_structure),
            ('pred_accessibility', self.pred_accessibility),
            ('pred_disordered', self.pred_disordered),
            ('pred_domain_arr', self.pred_domain_arr),
            ('area', self.area),
            ('composition', self.composition),
            ('kinase_features', self.kinase_features),
            ('_descriptors', self._descriptors),
        ):
            if val is not None:
                json_dict[key] = val
        if self._kinase_conservation is not None:
            # JSON requires string keys, so cast to str
            json_dict['_kinase_conservation'] = {
                str(k): v for k, v in self._kinase_conservation.items()
            }
        return json_dict

[docs]    @classmethod
    def fromJsonImplementation(cls, json_obj):
        # TODO MSV-1504: separate logic for different sequence types
        elem_type = tuple(json_obj.pop('element_type'))
        try:
            res_type = ALL_ELEMENT_TYPES[elem_type]
        except KeyError:
            long_code = elem_type[1]
            res_type = get_protein_alphabet().get(long_code)
            if res_type is None:
                short_code = elem_type[0]
                if short_code == UNKNOWN_NA.short_code:
                    res_type = UNKNOWN_NA.makeVariant(long_code)
                else:
                    res_type = UNKNOWN.makeVariant(long_code)
        res = cls(element_type=res_type,
                  inscode=json_obj['inscode'],
                  seqres_only=json_obj['seqres_only'],
                  resnum=json_obj['resnum'])
        if 'area' in json_obj:
            res.area = json_obj['area']
        if 'composition' in json_obj:
            res.composition = json_obj['composition']
        if 'b_factor' in json_obj:
            res.b_factor = json_obj['b_factor']
        if 'secondary_structure' in json_obj:
            res.secondary_structure = json_obj['secondary_structure']
        if 'pred_secondary_structure' in json_obj:
            res.pred_secondary_structure = json_obj['pred_secondary_structure']
        from schrodinger.protein import predictors
        if json_obj.get('pred_accessibility'):
            res.pred_accessibility = predictors.SolventAccessibility.fromJson(
                json_obj['pred_accessibility'])
        if json_obj.get('pred_disordered'):
            res.pred_disordered = predictors.Disordered.fromJson(
                json_obj['pred_disordered'])
        if json_obj.get('pred_domain_arr'):
            res.pred_domain_arr = predictors.DomainArrangement.fromJson(
                json_obj['pred_domain_arr'])

        kinase_feature = json_obj.get('kinase_features')
        if kinase_feature is not None:
            res.kinase_features = annotation.KinaseFeatureLabel.fromJson(
                kinase_feature)
        res._descriptors = json_obj.get('_descriptors')
        kinase_conservation = json_obj.get('_kinase_conservation')
        if kinase_conservation is not None:
            kinase_conservation = {
                int(k): annotation.KinaseConservation.fromJson(v)
                for k, v in kinase_conservation.items()
            }
        res._kinase_conservation = kinase_conservation
        return res

    def __str__(self):
        """
        Returns the short code for the residue
        """
        return str(self.type)

    def __repr__(self):
        class_fmt = "%s({parts})" % self.__class__.__name__
        parts = [repr(self.type)]
        kwarg_list = self._getReprKwargList()
        for kwarg_name, kwarg_val in kwarg_list:
            parts.append("{0}={1}".format(kwarg_name, kwarg_val))
        return class_fmt.format(parts=", ".join(parts))

    def _getNewInstance(self):
        return self.__class__(self.type)

    def __copy__(self):
        new_res = super().__copy__()
        blacklist = self._getCopyBlackList()
        for name in type(self).__slots__:
            if name in blacklist:
                continue
            setattr(new_res, name, getattr(self, name))
        return new_res

    def __deepcopy__(self, memo):
        new_res = copy.copy(self)
        blacklist = self._getCopyBlackList()
        for name in type(self).__slots__:
            if name in blacklist:
                continue
            value = getattr(self, name)
            setattr(new_res, name, copy.deepcopy(value, memo))
        return new_res

    def _getCopyBlackList(self):
        """
        A list of attributes to not copy or deepcopy.

        :return: Attribute names to not copy or deepcopy
        :rtype: set(str)
        """
        # Don't copy type because it's intended to be a singleton
        # Don't copy sequence because it's set in the superclass
        # disulfide_bond is not copied due to the invariant that a maximum of 2
        # residues may contain the same disulfide bond
        return {"type", "sequence", "disulfide_bond", "pred_disulfide_bond"}

    def _getReprKwargList(self):
        """
        Return a list of kwargs that should be specified in the repr.

        :return: list of (kwarg_key, kwarg_value) pairs
        :rtype: list[tuple(str, str)]
        """
        resnum = "None" if self.resnum is None else "%s" % self.resnum
        kwarg_list = [('inscode', "'%s'" % self.inscode), ('resnum', resnum)]
        if self.molecule_number is not None:
            kwarg_list.append(('molnum', str(self.molecule_number)))
        if self.seqres_only:
            kwarg_list.append(('seqres_only', 'True'))
        return kwarg_list

[docs]    def getChainKey(self):
        # TODO MSV-2379 account for all items in WHResidue.d_hash
        return ResidueChainKey(self.resnum, self.inscode)

[docs]    def getKey(self):
        """
        Get a key that uniquely identifies the residue relative to structures.

        :return: Residue key as (entry_id, chain, inscode, resnum, molnum,
            pdbname), or None if residue is unparented or has no structure
        :rtype: ResidueKey or NoneType
        """
        seq = self.sequence
        if seq is None or not self.hasStructure() or seq.entry_id is None:
            return None
        return get_residue_key(self, seq.entry_id, seq.structure_chain)

[docs]    def hasSetResNum(self) -> bool:
        """
        :return: Whether a specific resnum has been set
        """
        return self._resnum is not None

    @property
    def resnum(self):
        """
        If resnum is set to None, resnum will be auto-generated from column
        number.
        """
        if not self.hasSetResNum() and self.sequence is not None:
            return self.idx_in_seq + 1
        return self._resnum

    @resnum.setter
    def resnum(self, value):
        self._resnum = value

    @property
    def inscode(self):
        """
        If inscode and rescode are both set to None, the inscode will be '+'.
        """
        if self._inscode is None:
            if self.hasSetResNum():
                return ' '
            return '+'
        return self._inscode

    @inscode.setter
    def inscode(self, value):
        self._inscode = value

    @property
    def rescode(self):
        if self.resnum is not None:
            return str(self.resnum) + self.inscode

    @property
    def short_code(self):
        return self.type.short_code

    @property
    def long_code(self):
        return self.type.long_code

    @property
    def chain(self):
        """
        The name of the sequence chain that this residue belongs to.
        :rtype: str
        """
        return "" if self.sequence is None else self.sequence.chain

    @property
    def structure_chain(self):
        """
        The name of chain for the structure that this sequence is associated
        with.  This is normally the same as `chain`, but it can be different if
        the user manually links this sequence to a structure with differing
        chain names.

        :rtype: str
        """
        return "" if self.sequence is None else self.sequence.structure_chain

    @property
    def hydrophobicity(self):
        """
        :return: Hydrophobicity for the ResidueType on the Kyte-Doolittle scale,
            if available; otherwise None.
        :rtype: float
        """
        return self.type.hydrophobicity

    @property
    def hydrophilicity(self):
        """
        :return: Hydrophilicity for the ResidueType on the Hopp-Woods scale,
            if available; otherwise None
        :rtype: float
        """
        return self.type.hydrophilicity

    @property
    def charge(self):
        """
        :return: charge of the ResidueType of the residue
        :rtype: RESIDUE_CHARGE
        """
        return self.type.charge

    @property
    def helix_propensity(self):
        """
        :return: Helix propensity for the ResidueType of the residue
        :rtype: `HELIX_PROPENSITY`
        """
        return self.type.helix_propensity

    @property
    def beta_strand_propensity(self):
        """
        :return: Beta-strand propensity for the ResidueType of the residue
        :rtype: `BETA_STRAND_PROPENSITY`
        """
        return self.type.beta_strand_propensity

    @property
    def turn_propensity(self):
        """
        :return: Turn propensity for the ResidueType of the residue
        :rtype: `TURN_PROPENSITY`
        """
        return self.type.turn_propensity

    @property
    def helix_termination_tendency(self):
        """
        :return: Helix termination tendency for the ResidueType of the residue
        :rtype: `HELIX_TERMINATION_TENDENCY`
        """
        return self.type.helix_termination_tendency

    @property
    def exposure_tendency(self):
        """
        :return: Solvent exposure tendency for the ResidueType of the residue
        :rtype: `SOLVENT_EXPOSURE_TENDENCY`
        """
        return self.type.exposure_tendency

    @property
    def steric_group(self):
        """
        :return: Steric group for the ResidueType of the residue
        :rtype: `STERIC_GROUP`
        """
        return self.type.steric_group

    @property
    def side_chain_chem(self):
        """
        :return: Side chain chemistry for the ResidueType of the residue
        :rtype: `SIDE_CHAIN_CHEM`
        """
        return self.type.side_chain_chem

    @property
    def ss_prediction_sspro(self):
        """
        Returns a DSSP code matching the secondary structure prediction for the
        residue or None. Value is calculated from the SSpro backend.
        """
        return self._ss_prediction_sspro

    @property
    def ss_prediction_psipred(self):
        """
        Returns a DSSP code matching the secondary structure prediction for the
        residue or None. Value is calculated from thePsiPred backend.
        """
        return self._ss_prediction_psipred

    @ss_prediction_psipred.setter
    def ss_prediction_psipred(self, value):
        if value not in list(DSSP_CODES):
            raise ValueError("%s is not a valid DSSP code" % value)
        self._ss_prediction_psipred = value

    @ss_prediction_sspro.setter
    def ss_prediction_sspro(self, value):
        if value not in list(DSSP_CODES):
            raise ValueError("%s is not a valid DSSP code" % value)
        self._ss_prediction_sspro = value

    @property
    def isoelectric_point(self):
        """
        :return: A float representing the isoelectric point value for the
            ResidueType of the residue
        """
        return self.type.isoelectric_point

[docs]    @check_if_comparable(default_return=_DEFAULT_SIMILARITY)
    def getSimilarity(self, ref_res, similarity_matrix=constants.BLOSUM62):
        """
        Returns the similarity between the residue and a reference residue

        :param ref_res: The reference residue
        :type ref_res: `schrodinger.protein.residue.Residue`

        :param similarity_matrix: The scoring matrix to use

        :return: similarity score based on the similarity matrix
        :rtype: float
        """
        return get_matrix_value(similarity_matrix, self.short_code,
                                ref_res.short_code)

[docs]    @check_if_comparable()
    def getBinarySimilarity(self,
                            ref_res,
                            similarity_matrix=constants.BLOSUM62):
        """
        Returns if the residue and a reference residue are similar

        :param ref_res: The reference residue
        :type ref_res: `schrodinger.protein.residue.Residue`

        :param similarity_matrix: The scoring matrix to use

        :return: 1 if the similarity score is positive, otherwise 0.
        :rtype: int
        """
        return 1 if self.getSimilarity(ref_res, similarity_matrix) > 0.0 else 0

[docs]    @check_if_comparable()
    def getIdentity(self, ref_res):
        """
        Return the identity between the residue and a reference residue

        :param ref_res: The reference residue
        :type ref_res: `schrodinger.protein.residue.Residue`

        :return: 1 if same as the reference residue, 0 otherwise.
        :rtype: int
        """
        res_type = self.type
        ref_res_type = ref_res.type
        if (res_type.short_code == ref_res_type.short_code and
                res_type.name == ref_res_type.name):
            return 1
        return 0

[docs]    @check_if_comparable()
    def getIdentityStrict(self, ref_res):
        """
        Return the identity between the residue and a reference residue without
        considering nonstandard amino acids identical to their related standard
        amino acid.

        See getIdentity for additional documentation.
        """
        res_type = self.type
        ref_res_type = ref_res.type
        maybe_identical = self.getIdentity(ref_res)
        if maybe_identical:
            if not res_type.nonstandard and not ref_res_type.nonstandard:
                return maybe_identical
            elif res_type.nonstandard and ref_res_type.nonstandard:
                return int(res_type.long_code == ref_res_type.long_code)
        return 0

[docs]    @check_if_comparable()
    def getConservation(self, ref_res):
        """
        Return whether the residue and a reference residue have similar
        side-chain chemistry.

        The similarity criterion is based on "side chain chemistry"
        descriptor matching.

        :param ref_res: The reference residue
        :type ref_res: `schrodinger.protein.residue.Residue`

        :return: 1 if the residue and reference residue are have similar side
            chain chemistry, 0 otherwise.
        :rtype: int
        """
        res_group = _HOMOLOGY_GROUPS.get(self.short_code)
        ref_group = _HOMOLOGY_GROUPS.get(ref_res.short_code)
        if res_group and ref_group and (res_group == ref_group):
            return 1

        return 0

[docs]    def getStructureResProperties(self):
        """
        Return all properties for the corresponding structure residue's alpha
        carbon.  Properties that apply to the whole residue are stored as atom
        properties on this atom.  An empty dictionary will be returned if this
        residue doesn't have a corresponding alpha carbon.

        :return: A dictionary-like object containing the properties.
        :rtype: structure._StructureAtomProperty or dict
        """
        seq = self.sequence
        if seq is None or self.seqres_only or not seq.hasStructure():
            return {}
        struc_res = seq.getStructureResForRes(self)
        if struc_res is None:
            return {}
        calpha = struc_res.getAlphaCarbon()
        if calpha is None:
            return {}
        return calpha.property

[docs]    def updateDescriptors(self, descriptors):
        """
        Updates the descriptor dicts with new descriptor values

        :param descriptors: A dict mapping descriptor names to their values
        :type descriptors: dict[str, float]
        """
        self.descriptors.update(descriptors)

[docs]    def getDescriptorValue(self, descriptor_name):
        return self.descriptors.get(descriptor_name)

[docs]    def getDescriptorKeys(self):
        return self.descriptors.keys()

[docs]    def getProperty(self, seq_prop):
        """
        Get the residue's value corresponding to the given SequenceProperty
        object

        :param seq_prop: The object describing the residue property
        :type seq_prop: schrodinger.protein.properties.SequenceProperty

        :return: The value of the sequence property
        :rtype: str, int, float or None
        """
        if seq_prop.property_type == properties.PropertyType.StructureProperty:
            struc_props = self.getStructureResProperties()
            prop_val = struc_props.get(seq_prop.property_name)
        else:
            prop_val = self.getDescriptorValue(seq_prop.property_name)
        return prop_val


[docs]class CombinedChainResidueWrapper:
    """
    A wrapper for a residue or gap so that res.sequence points to the
    combined-chain sequence and res.idx_in_seq gives the index within the
    combined-chain sequence.

    Note that these wrappers are generated as needed and the combined-chain
    sequence does not store any reference to the generated instances.  As such,
    these residues should not be stored using weakrefs and should not be
    compared using identity.  Also note that these residues will not compare
    equal to the split-chain residues that they wrap.
    """

[docs]    def __init__(self, res, combined_chain_seq):
        """
        :param res: The residue or gap to wrap
        :type res: AbstractSequenceElement

        :param combined_chain_seq: The combined-chain sequence that the residue
            is part of.
        :type combined_chain_seq: sequence.CombinedChainProteinSequence
        """
        self._res = res
        self._seq = combined_chain_seq

    def __getattr__(self, attr_name):
        return getattr(self._res, attr_name)

    def __dir__(self):
        attr_names = set(dir(self._res))
        attr_names.update(self.__dict__.keys())
        attr_names.update(self.__class__.__dict__.keys())
        return attr_names

    def __eq__(self, other):
        if isinstance(other, CombinedChainResidueWrapper):
            return self._res == other._res
        else:
            return False

    def __hash__(self):
        return hash((self.__class__, self._res))

    def __str__(self):
        return str(self._res)

    def __repr__(self):
        return f"{self.__class__.__name__}({repr(self._res)})"

    def __copy__(self):
        raise RuntimeError(f"Cannot copy {self.__class__.__name__} instances")

    def __deepcopy__(self, memo):
        raise RuntimeError(f"Cannot copy {self.__class__.__name__} instances")

    def _inSeq(self):
        """
        Does this residue still exist in the sequence it was created from?  This
        will return False if the residue has been removed from its split-chain
        sequence or if the chain has been removed from the combined-chain
        sequence.

        :rtype: bool
        """
        return self._res.sequence in self._seq.chains

    @property
    def sequence(self):
        """
        The combined-chain sequence that the residue is part of, or None if the
        residue has been removed from the sequence.

        :rtype: sequence.CombinedChainProteinSequence or None
        """
        if self._inSeq():
            return self._seq
        else:
            return None

    @property
    def idx_in_seq(self):
        """
        This residue's index in the combined-chain sequence, or None if the
        residue has been removed from the sequence.

        :rtype: int or None
        """
        if self._inSeq():
            return self._seq.index(self)
        else:
            return None

    @property
    def split_res(self):
        """
        The split-chain residue or gap that this residue is wrapping.

        :rtype: AbstractSequenceElement
        """
        return self._res

    @property
    def split_sequence(self):
        """
        The split-chain sequence that this residue is part or, or None if the
        residue has been removed from the sequence.

        :rtype: sequence.ProteinSequence or None
        """
        return self._res.sequence

    @property
    def disulfide_bond(self):
        """
        The current disulfide bond, if any, that this residue is involved in.
        :rtype: CombinedChainDisulfideBond or None
        """
        if self._inSeq():
            split_bond = self._res.disulfide_bond
            if split_bond is not None and split_bond.isValid():
                combined_bond = CombinedChainDisulfideBond(
                    split_bond, self._seq)
                # Make sure that neither residue is part of a chain that's been
                # removed from the combined sequence
                if all(res.sequence is not None for res in combined_bond):
                    return combined_bond
        return None


[docs]class DisulfideBond:
    """
    Representation of a disulfide bond.
    """

[docs]    def __init__(self, res1, res2):
        """
        :param res1: A residue in the bond
        :type res1: Residue

        :param res2: The other residue in the bond
        :type res2: Residue
        """
        self._res_pair = weakref.WeakSet((res1, res2))

    def __iter__(self):
        key = lambda r: r.idx_in_seq if r.sequence is not None else 0
        return iter(sorted(self._res_pair, key=key))

    @property
    def res_pair(self):
        return tuple(self)

[docs]    def isValid(self):
        """
        Check whether the disulfide bond is valid and if so, return its seqs.

        :return: False if the disulfide bond is invalid, the seqs otherwise.
        :rtype: bool or list(sequence.ProteinSequence, sequence.ProteinSequence)
        """
        too_short = len(self.res_pair) < 2
        deleted_res = any(res.sequence is None for res in self.res_pair)
        return not (too_short or deleted_res)

    @property
    def is_intra_sequence(self):
        """
        Check whether the bond is valid and intra-sequence.

        :return: Whether the bond is a valid, intra-sequence bond.
        :rtype: bool

        :raise ValueError: If the bond is not valid
        """
        if not self.isValid():
            raise ValueError("Bond is not valid")
        seq1, seq2 = [res.sequence for res in self.res_pair]
        return seq1 is seq2

    @property
    def is_inter_sequence(self):
        """
        Check whether the bond is valid and inter-sequence.

        :return: Whether the bond is a valid, inter-sequence bond.
        :rtype: bool

        :raise ValueError: If the bond is not valid
        """
        if not self.isValid():
            raise ValueError("Bond is not valid")
        seq1, seq2 = [res.sequence for res in self.res_pair]
        return seq1 is not seq2


[docs]class CombinedChainDisulfideBond(DisulfideBond):
    """
    A disulfide bond in a `sequence.CombinedChainProteinSequence`.
    """

[docs]    def __init__(self, bond, seq):
        """
        :param bond: The split-chain disulfide bond.
        :type bond: DisulfideBond

        :param seq: The combined-chain sequence that this bond is in.
        :type seq: sequence.CombinedChainProteinSequence
        """
        self._res_pair = {CombinedChainResidueWrapper(res, seq) for res in bond}
        self._split_seq_bond = bond

    def __eq__(self, other):
        if isinstance(other, CombinedChainDisulfideBond):
            return self._split_seq_bond == other._split_seq_bond
        elif isinstance(other, DisulfideBond):
            return self._split_seq_bond == other
        else:
            return super().__eq__(other)

    def __hash__(self):
        return hash(self._split_seq_bond)

    @property
    def is_intra_sequence(self):
        return True

    @property
    def is_inter_sequence(self):
        return False


[docs]def add_disulfide_bond(res1, res2, known=True):
    """
    Add a disulfide bond between two residues.

    :param res1: A residue to link with a disulfide bond
    :type  res1: residue.Residue

    :param res2: Another residue to link with a disulfide bond
    :type  res2: residue.Residue

    :param known: Whether the bond is a known bond or a predicted bond.
    :type  known: bool
    """
    bond_attr_name = 'disulfide_bond' if known else 'pred_disulfide_bond'
    if getattr(res1, bond_attr_name) is not None:
        raise ValueError(f'"{res1}" is already a part of a bond')
    elif getattr(res2, bond_attr_name) is not None:
        raise ValueError(f'"{res2}" is already a part of a bond')
    bond = DisulfideBond(res1, res2)
    for res in res1, res2:
        setattr(res, bond_attr_name, bond)
    res1.sequence.clearDisulfideBondsCache()
    res2.sequence.clearDisulfideBondsCache()


[docs]def remove_disulfide_bond(bond):
    """
    Remove a disulfide bond between two residues.

    :param bond: The bond to disconnect
    :type  bond: residue.DisulfideBond
    """
    res1, res2 = bond.res_pair
    res1.sequence.clearDisulfideBondsCache()
    res2.sequence.clearDisulfideBondsCache()
    if bond == res1.disulfide_bond == res2.disulfide_bond:
        for res in bond.res_pair:
            res.disulfide_bond = None
    elif bond == res1.pred_disulfide_bond == res2.pred_disulfide_bond:
        for res in bond.res_pair:
            res.pred_disulfide_bond = None
    else:
        assert False, 'Undefined behavior'


[docs]class Nucleotide(Residue):
    # TODO (MSV-1504): Create proper nucleic acid domain objects
    pass