# -*- coding: utf-8 -*-
import copy
import functools
import itertools
import types
import typing
import weakref
from enum import Enum
import decorator
from schrodinger import structure
from schrodinger.models import json
from schrodinger.protein import annotation
from schrodinger.protein import constants
from schrodinger.protein import nonstandard_residues
from schrodinger.protein import properties
_UNKNOWN_NAME = "Unknown"
_DEFAULT_SIMILARITY = 0.0
_HOMOLOGY_GROUPS = {
"D": 1,
"E": 1,
"R": 2,
"K": 2,
"H": 2,
"G": 3,
"A": 3,
"V": 3,
"I": 3,
"L": 3,
"M": 3,
"F": 4,
"Y": 4,
"W": 4,
"S": 5,
"T": 5,
"N": 5,
"Q": 5,
"C": 6,
"P": 7
}
[docs]def check_if_comparable(default_return=0):
"""
Decorator to return early from residue comparison methods.
Returns `default_return` if:
- the other residue is a gap
- the residues are different types (e.g. protein/DNA or DNA/RNA)
- either residue is an unknown residue
"""
@decorator.decorator
def dec(func, self, other, *args, **kwargs):
if (other.is_gap or type(self.type) is not type(other.type) or
self.type.name == _UNKNOWN_NAME or
other.type.name == _UNKNOWN_NAME):
return default_return
return func(self, other, *args, **kwargs)
return dec
[docs]class ResidueChainKey(typing.NamedTuple):
"""
Key to partially identify a structured residue relative to its chain
"""
# TODO MSV-2379 account for items in WHResidue.d_hash
resnum: int
inscode: str
[docs]class ResidueKey(typing.NamedTuple):
"""
Key to partially identify a structured residue.
Order and items based on `MaestroStructureModel._getKey`
"""
entry_id: int
chain: str
resnum: int
inscode: str
[docs] def chainKey(self):
return ResidueChainKey(self.resnum, self.inscode)
[docs]def get_matrix_value(matrix, first, second):
"""
Return a similarity matrix value for a specified pair of residues.
"""
if (first, second) in matrix:
return matrix[(first, second)]
if (second, first) in matrix:
return matrix[(second, first)]
return _DEFAULT_SIMILARITY
[docs]def box_letter(letter):
"""
Map an ASCII letter to the circled Unicode variant
:param letter: ASCII letter to map
:type letter: str
:raises ValueError: if the input is not an ASCII letter
"""
codepoint = ord(letter)
if 97 <= codepoint <= 122:
offset = 9327
elif 65 <= codepoint <= 90:
offset = 9333
else:
raise ValueError(f"Could not map {letter}")
return chr(codepoint + offset)
[docs]def get_structure_residue_chain_key(structure_residue):
"""
Creates residue key relative to entry and chain for structure residue.
:return: (resnum, inscode)
:rtype: tuple(int, str)
"""
# TODO MSV-2379 account for all items in WHResidue.d_hash
return (structure_residue.resnum, structure_residue.inscode)
[docs]def get_residue_key(residue, entry_id, chain):
"""
Creates residue key for residue.
:param residue: Residue
:type residue: Residue
:param entry_id: Entry ID
:type entry_id: str or int
:param chain: Chain name
:type chain: str
:return: The residue key containing entry_id, chain, resnum, and inscode
:rtype: ResidueKey
"""
# TODO MSV-2379 account for all items in WHResidue.d_hash
return ResidueKey(int(entry_id), chain, *residue.getChainKey())
[docs]def get_structure_residue_key(structure_residue, entry_id):
"""
Creates residue key for structure residue.
:param structure_residue: Structure residue
:type structure_residue: schrodinger.structure._Residue
:param entry_id: Entry ID
:type entry_id: str or int
:return: The residue key containing entry_id, chain, resnum, and inscode
:rtype: ResidueKey
"""
# TODO MSV-2379 account for all items in WHResidue.d_hash
return ResidueKey(int(entry_id), structure_residue.chain,
*get_structure_residue_chain_key(structure_residue))
[docs]def order_contiguous(residues):
"""
Check if a list of residues is contiguous, and put them in contiguous
order if they are not.
:type residues: list
:param residues: List of `schrodinger.structure._Residue` objects
:rtype: list
:return: List of `schrodinger.structure._Residue` objects in contiguous
order, or None if the residues were not contiguous.
"""
if not residues:
return []
ordered_residues = [residues.pop(0)]
old_length = -1
# Continually cycle through, finding one connected residue each time,
# until we find no more connected residues
while residues and len(residues) != old_length:
current_length = len(residues)
old_length = current_length
for index in range(current_length):
if ordered_residues[-1].isConnectedToResidue(residues[index]):
# Connected to last residue in the ordered list
ordered_residues.append(residues.pop(index))
break
elif residues[index].isConnectedToResidue(ordered_residues[0]):
# Connected to first residue in the ordered list
ordered_residues.insert(0, residues.pop(index))
break
if residues:
# There remain some unconnected residues
return None
else:
return ordered_residues
HELIX_PROPENSITY = Enum(
'HELIX_PROPENSITY',
['NoPropensity', 'Likely', 'Weak', 'Ambivalent', 'HelixBreaking'])
BETA_STRAND_PROPENSITY = Enum(
'BETA_STRAND_PROPENSITY',
['NoPropensity', 'StrandBreaking', 'Ambivalent', 'StrandForming'])
TURN_PROPENSITY = Enum(
'TURN_PROPENSITY',
['NoPropensity', 'TurnForming', 'Ambivalent', 'TurnBreaking'])
HELIX_TERMINATION_TENDENCY = Enum(
'HELIX_TERMINATION_TENDENCY',
['NoTendency', 'HelixStarting', 'Ambivalent', 'HelixEnding'])
SOLVENT_EXPOSURE_TENDENCY = Enum(
'SOLVENT_EXPOSURE_TENDENCY',
['NoTendency', 'Surface', 'Ambivalent', 'Buried'])
STERIC_GROUP = Enum('STERIC_GROUP',
['NoSteric', 'Small', 'Ambivalent', 'Polar', 'Aromatic'])
SIDE_CHAIN_CHEM = Enum('SIDE_CHAIN_CHEM', [
'NoSideChainChem', 'AcidicHydrophilic', 'BasicHydrophilic',
'NeutralHydrophobicAliphatic', 'NeutralHydrophobicAromatic',
'NeutralHydrophilic', 'PrimaryThiol', 'IminoAcid'
])
RESIDUE_CHARGE = Enum('RESIDUE_CHARGE', ['Positive', 'Negative', 'Neutral'])
# Color Block Tooltip Map.
CB_TT_MAP = {
HELIX_PROPENSITY.NoPropensity: "",
HELIX_PROPENSITY.Likely: "helix-forming",
HELIX_PROPENSITY.Weak: "weak helix-forming",
HELIX_PROPENSITY.Ambivalent: "ambivalent",
HELIX_PROPENSITY.HelixBreaking: "helix-breaking",
BETA_STRAND_PROPENSITY.NoPropensity: "",
BETA_STRAND_PROPENSITY.StrandBreaking: "strand-breaking",
BETA_STRAND_PROPENSITY.Ambivalent: "ambivalent",
BETA_STRAND_PROPENSITY.StrandForming: "strand-forming",
TURN_PROPENSITY.NoPropensity: "",
TURN_PROPENSITY.TurnForming: "turn-forming",
TURN_PROPENSITY.Ambivalent: "ambivalent",
TURN_PROPENSITY.TurnBreaking: "turn-breaking",
HELIX_TERMINATION_TENDENCY.NoTendency: "",
HELIX_TERMINATION_TENDENCY.HelixStarting: "helix-starting",
HELIX_TERMINATION_TENDENCY.Ambivalent: "ambivalent",
HELIX_TERMINATION_TENDENCY.HelixEnding: "helix-ending",
SOLVENT_EXPOSURE_TENDENCY.NoTendency: "",
SOLVENT_EXPOSURE_TENDENCY.Surface: "surface",
SOLVENT_EXPOSURE_TENDENCY.Ambivalent: "ambivalent",
SOLVENT_EXPOSURE_TENDENCY.Buried: "buried",
STERIC_GROUP.Small: "small, non-interfering",
STERIC_GROUP.Ambivalent: "ambivalent",
STERIC_GROUP.Polar: "sticky polar",
STERIC_GROUP.Aromatic: "aromatic",
SIDE_CHAIN_CHEM.AcidicHydrophilic: "acidic, hydrophilic",
SIDE_CHAIN_CHEM.BasicHydrophilic: "basic, hydrophilic",
SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic: "neutral, hydrophobic, aliphatic",
SIDE_CHAIN_CHEM.NeutralHydrophobicAromatic: "neutral, hydrophobic, aromatic",
SIDE_CHAIN_CHEM.NeutralHydrophilic: "neutral, hydrophilic",
SIDE_CHAIN_CHEM.PrimaryThiol: "primary thiol",
SIDE_CHAIN_CHEM.IminoAcid: "imino acid",
RESIDUE_CHARGE.Positive: "positive",
RESIDUE_CHARGE.Negative: "negative",
RESIDUE_CHARGE.Neutral: "neutral",
}
# Non-standard Amino Acids Tooltip Map.
NON_STD_AA_TT_MAP = {
'LYN': 'deprotonated LYS',
'ARN': 'deprotonated ARG',
'ASH': 'protonated ASP',
'GLH': 'protonated GLU',
'HID': 'δ-nitrogen protonated HIS',
'HIE': 'ε-nitrogen protonated HIS',
'HIP': 'δ and ε-nitrogen protonated HIS'
}
SSA_TT_MAP = {
structure.SS_NONE: "None",
structure.SS_LOOP: "Loop",
structure.SS_HELIX: "Helix",
structure.SS_STRAND: "Strand",
structure.SS_TURN: "Turn",
}
DSSP_CODES = {
"G": "3-turn helix",
"H": "4-turn helix",
"I": "5-turn helix",
"T": "hydrogen bonded turn",
"E": "extended strand in parallel and/or anti-parallel Beta sheet conformation",
"B": "residue in isolated Beta-bridge",
"S": "bend (non-hydrogen-bond based)",
"C": "coil",
}
[docs]class ElementType(object):
[docs] def __init__(self, short_code, long_code, name):
self.short_code = short_code
self.long_code = long_code
self.name = name
self.nonstandard = False
def __str__(self):
return self.short_code
def __repr__(self):
res_type_repr = "{cls}('{short_code}', '{long_code}', '{name}')".format(
cls=self.__class__.__name__,
short_code=self.short_code,
long_code=self.long_code,
name=self.name)
return res_type_repr
[docs] def makeVariant(self, long_code, short_code=None, *, nonstandard=True):
"""
Create a variant of an element type with a new long and short code.
:param long_code: A 2+ character string representing the element type
:type long_code: str
:param short_code: A 1 character string representing the element type
:type short_code: str
:param nonstandard: Whether the variant should be considered
nonstandard. If False, the residue will be generally treated
identically to its standard (e.g. HIE/HIS)
:type nonstandard: bool
:return: The variant element type
:rtype: ElementType
"""
element_type = copy.deepcopy(self)
element_type.long_code = long_code
if short_code is not None:
element_type.short_code = short_code
element_type.nonstandard = nonstandard
return element_type
[docs]class ResidueType(ElementType):
[docs] def __init__(self,
short_code,
long_code,
name,
charge=None,
hydrophobicity=None,
hydrophilicity=None,
helix_propensity=None,
beta_strand_propensity=None,
turn_propensity=None,
helix_termination_tendency=None,
exposure_tendency=None,
steric_group=None,
side_chain_chem=None,
isoelectric_point=None):
super(ResidueType, self).__init__(short_code, long_code, name)
self.charge = charge
self.hydrophobicity = hydrophobicity
self.helix_propensity = helix_propensity
self.beta_strand_propensity = beta_strand_propensity
self.turn_propensity = turn_propensity
self.helix_termination_tendency = helix_termination_tendency
self.exposure_tendency = exposure_tendency
self.steric_group = steric_group
self.side_chain_chem = side_chain_chem
self.isoelectric_point = isoelectric_point
# TODO MSV-1504 determine what nucleotides need and change parent to ElementType
[docs]class NucleotideType(ResidueType):
pass
[docs]class DeoxyribonucleotideType(NucleotideType):
pass
[docs]class RibonucleotideType(NucleotideType):
pass
# Std Amino Acids
ALANINE = ResidueType(
"A",
"ALA",
"Alanine",
hydrophobicity=1.80,
hydrophilicity=-0.50,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.Likely,
beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent,
turn_propensity=TURN_PROPENSITY.TurnBreaking,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
steric_group=STERIC_GROUP.Small,
side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic,
isoelectric_point=6.00)
ARGININE = ResidueType(
"R",
"ARG",
"Arginine",
hydrophobicity=-4.50,
hydrophilicity=3.00,
charge=RESIDUE_CHARGE.Positive,
helix_propensity=HELIX_PROPENSITY.Ambivalent,
beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent,
turn_propensity=TURN_PROPENSITY.Ambivalent,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface,
steric_group=STERIC_GROUP.Polar,
side_chain_chem=SIDE_CHAIN_CHEM.BasicHydrophilic,
isoelectric_point=10.76)
ASPARAGINE = ResidueType(
"N",
"ASN",
"Asparagine",
hydrophobicity=-3.50,
hydrophilicity=0.20,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.Ambivalent,
beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent,
turn_propensity=TURN_PROPENSITY.TurnForming,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface,
steric_group=STERIC_GROUP.Ambivalent,
side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophilic,
isoelectric_point=5.41)
ASPARTIC_ACID = ResidueType(
"D",
"ASP",
"Aspartic acid",
hydrophobicity=-3.50,
hydrophilicity=3.00,
charge=RESIDUE_CHARGE.Negative,
helix_propensity=HELIX_PROPENSITY.Ambivalent,
beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming,
turn_propensity=TURN_PROPENSITY.TurnForming,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding,
steric_group=STERIC_GROUP.Ambivalent,
side_chain_chem=SIDE_CHAIN_CHEM.AcidicHydrophilic,
isoelectric_point=2.77)
CYSTEINE = ResidueType(
"C",
"CYS",
"Cysteine",
hydrophobicity=2.50,
hydrophilicity=-1.00,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.Ambivalent,
beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent,
turn_propensity=TURN_PROPENSITY.TurnBreaking,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
steric_group=STERIC_GROUP.Small,
side_chain_chem=SIDE_CHAIN_CHEM.PrimaryThiol,
isoelectric_point=5.07)
GLUTAMIC_ACID = ResidueType(
"E",
"GLU",
"Glutamic acid",
hydrophobicity=-3.50,
hydrophilicity=3.00,
charge=RESIDUE_CHARGE.Negative,
helix_propensity=HELIX_PROPENSITY.Likely,
beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming,
turn_propensity=TURN_PROPENSITY.Ambivalent,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface,
steric_group=STERIC_GROUP.Polar,
side_chain_chem=SIDE_CHAIN_CHEM.AcidicHydrophilic,
isoelectric_point=3.22)
GLUTAMINE = ResidueType(
"Q",
"GLN",
"Glutamine",
hydrophobicity=-3.50,
hydrophilicity=0.20,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.Likely,
beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent,
turn_propensity=TURN_PROPENSITY.Ambivalent,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface,
steric_group=STERIC_GROUP.Polar,
side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophilic,
isoelectric_point=5.65)
GLYCINE = ResidueType(
"G",
"GLY",
"Glycine",
hydrophobicity=-0.40,
hydrophilicity=0.00,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.HelixBreaking,
beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming,
turn_propensity=TURN_PROPENSITY.TurnForming,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
steric_group=STERIC_GROUP.Small,
side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic,
isoelectric_point=5.97)
HISTIDINE = ResidueType(
"H",
"HIS",
"Histidine",
hydrophobicity=-3.20,
hydrophilicity=-0.50,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.Ambivalent,
beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent,
turn_propensity=TURN_PROPENSITY.TurnBreaking,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface,
steric_group=STERIC_GROUP.Aromatic,
side_chain_chem=SIDE_CHAIN_CHEM.BasicHydrophilic,
isoelectric_point=7.59)
ISOLEUCINE = ResidueType(
"I",
"ILE",
"Isoleucine",
hydrophobicity=4.50,
hydrophilicity=-1.80,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.Weak,
beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
turn_propensity=TURN_PROPENSITY.TurnBreaking,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried,
steric_group=STERIC_GROUP.Ambivalent,
side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic,
isoelectric_point=6.02)
LEUCINE = ResidueType(
"L",
"LEU",
"Leucine",
hydrophobicity=3.80,
hydrophilicity=-1.80,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.Likely,
beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
turn_propensity=TURN_PROPENSITY.TurnBreaking,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried,
steric_group=STERIC_GROUP.Ambivalent,
side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic,
isoelectric_point=5.98)
LYSINE = ResidueType(
"K",
"LYS",
"Lysine",
hydrophobicity=-3.90,
hydrophilicity=3.00,
charge=RESIDUE_CHARGE.Positive,
helix_propensity=HELIX_PROPENSITY.Likely,
beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming,
turn_propensity=TURN_PROPENSITY.Ambivalent,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface,
steric_group=STERIC_GROUP.Polar,
side_chain_chem=SIDE_CHAIN_CHEM.BasicHydrophilic,
isoelectric_point=9.74)
METHIONINE = ResidueType(
"M",
"MET",
"Methionine",
hydrophobicity=1.90,
hydrophilicity=-1.30,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.Likely,
beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
turn_propensity=TURN_PROPENSITY.TurnBreaking,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried,
steric_group=STERIC_GROUP.Ambivalent,
side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic,
isoelectric_point=5.74)
PHENYLALANINE = ResidueType(
"F",
"PHE",
"Phenylalanine",
hydrophobicity=2.80,
hydrophilicity=-2.50,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.Weak,
beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
turn_propensity=TURN_PROPENSITY.TurnBreaking,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried,
steric_group=STERIC_GROUP.Aromatic,
side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAromatic,
isoelectric_point=5.48)
PROLINE = ResidueType(
"P",
"PRO",
"Proline",
hydrophobicity=-1.60,
hydrophilicity=0.00,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.HelixBreaking,
beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming,
turn_propensity=TURN_PROPENSITY.TurnForming,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
steric_group=STERIC_GROUP.Ambivalent,
side_chain_chem=SIDE_CHAIN_CHEM.IminoAcid,
isoelectric_point=6.30)
SERINE = ResidueType(
"S",
"SER",
"Serine",
hydrophobicity=-0.80,
hydrophilicity=0.30,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.Ambivalent,
beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent,
turn_propensity=TURN_PROPENSITY.TurnForming,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
steric_group=STERIC_GROUP.Small,
side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophilic,
isoelectric_point=5.58)
THREONINE = ResidueType(
"T",
"THR",
"Threonine",
hydrophobicity=-0.70,
hydrophilicity=-0.40,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.Ambivalent,
beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
turn_propensity=TURN_PROPENSITY.Ambivalent,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophilic,
steric_group=STERIC_GROUP.Ambivalent,
isoelectric_point=5.60)
TRYPTOPHAN = ResidueType(
"W",
"TRP",
"Tryptophan",
hydrophobicity=-0.90,
hydrophilicity=-3.40,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.Weak,
beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
turn_propensity=TURN_PROPENSITY.TurnBreaking,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
steric_group=STERIC_GROUP.Aromatic,
side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAromatic,
isoelectric_point=5.89)
TYROSINE = ResidueType(
"Y",
"TYR",
"Tyrosine",
hydrophobicity=-1.30,
hydrophilicity=-2.30,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.HelixBreaking,
beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
turn_propensity=TURN_PROPENSITY.Ambivalent,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent,
steric_group=STERIC_GROUP.Aromatic,
side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAromatic,
isoelectric_point=5.66)
UNKNOWN = ResidueType("X", "UNK", _UNKNOWN_NAME)
VALINE = ResidueType(
"V",
"VAL",
"Valine",
hydrophobicity=4.20,
hydrophilicity=-1.50,
charge=RESIDUE_CHARGE.Neutral,
helix_propensity=HELIX_PROPENSITY.Weak,
beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking,
turn_propensity=TURN_PROPENSITY.TurnBreaking,
helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent,
exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried,
steric_group=STERIC_GROUP.Ambivalent,
side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic,
isoelectric_point=5.96)
CAPPING_GROUP = ResidueType("X", "", "Capping Group")
UNKNOWN_NA = NucleotideType("N", "UNK", _UNKNOWN_NAME)
#RNA
ADENINE = RibonucleotideType("A", "A", "Adenine")
CYTOSINE = RibonucleotideType("C", "C", "Cytosine")
GUANINE = RibonucleotideType("G", "G", "Guanine")
URACIL = RibonucleotideType("U", "U", "Uracil")
#DNA
dADENINE = DeoxyribonucleotideType("A", "DA", "Adenine")
dCYTOSINE = DeoxyribonucleotideType("C", "DC", "Cytosine")
dGUANINE = DeoxyribonucleotideType("G", "DG", "Guanine")
dTHYMINE = DeoxyribonucleotideType("T", "DT", "Thymine")
# yapf: disable
STD_AMINO_ACIDS = [
ALANINE, CYSTEINE, ASPARTIC_ACID, GLUTAMIC_ACID, PHENYLALANINE, GLYCINE,
HISTIDINE, ISOLEUCINE, LYSINE, LEUCINE, METHIONINE, ASPARAGINE, PROLINE,
GLUTAMINE, ARGININE, SERINE, THREONINE, VALINE, TRYPTOPHAN, TYROSINE,
]
STD_RNA_BASES = [ADENINE, CYTOSINE, GUANINE, URACIL]
STD_DNA_BASES = [dADENINE, dCYTOSINE, dGUANINE, dTHYMINE]
AMINO_ACID_VARIANTS = [
(ALANINE, [
"AIB", "ALM", "AYA", "BNN", "CHG", "CSD", ("DAL", "X"),
"DHA", "DNP", "FLA", "HAC", "MAA", "PRR", "TIH", "TPQ",
]),
(ARGININE, ["AGM", ("DAR", "X"), "HAR", "MMO", "ARM", "ARN", "HMR", "ACL"]),
(ASPARAGINE, ["MEN", ("DSG", "X")]),
(ASPARTIC_ACID, [
"DSP", "BHD", "2AS", "ASQ", "ASB", "ASA", "ASK", "ASH",
"ASL", ("DAS", "X")
]),
(CYSTEINE,
[("BCS", "X"), "BUC", "C5C", "C6C", "CCS", "CEA", "CME", "CSO", "CSP",
"CSS", "CSW", "CSX", "CY1", "CY3", "CYG", "CYM", "CYP", "CYQ",
"CYX", ("DCY", "X"), "EFC", "OCS", "PEC", "PR3", "SCH", "SCS", "SCY",
"SHC", "SMC", "SOC"]),
(GLUTAMIC_ACID,
["GLH", "GGL", 'PCA', '5HP', ('DGL', "X"), 'CGU', 'GMA']),
(GLUTAMINE, [("DGN", "X")]),
(GLYCINE, ["GLZ", "SAR", 'NMC', 'GL3', 'GSC', 'MPQ', 'MSA']),
(HISTIDINE, [("DHI", "X"), "HID", "HIC", "HIE", "HIP", "HSD", "HSE",
"HSP", "MHS", "NEM", "NEP", "3AH"]),
(ISOLEUCINE, [('DIL', "X"), 'IIL']),
(LEUCINE,
["BUG", ("NLE", "X"), 'NLP', 'NLN', ('DLE', "X"), 'CLE', 'MLE']),
(LYSINE, [
'LYM', 'ALY', 'LYZ', 'LYN', 'LLY', 'LLP', 'SHR', 'TRG',
('DLY', "X"), 'KCX'
]),
(METHIONINE, ["FME", "CXM", "OMT", "MSE"]),
(PHENYLALANINE, ["DAH", ("DPN", "X"), "HPQ", "PHI", "PHL"]),
(PROLINE, [('DPR', "X"), ('HYP', "X")]),
(SERINE,
['OAS', 'MIS', 'SAC', 'SVA', 'SET', ('SEP', "X"), 'SEL', ("DSN",
"X")]),
(THREONINE, ["ALO", "BMT", ("DTH", "X"), "THO", ("TPO", "t")]),
(TRYPTOPHAN, [("DTR", "X"), "HTR", "LTR", "TPL", "TRO"]),
(TYROSINE, [("DTY", "X"), "IYR", "PAQ", ("PTR", "y"), "STY", "TYB",
"TYM", "TYO", "TYQ", "TYS", "TYY"]),
(VALINE, ["DIV", ("DVA", "X"), "MVA"])
]
NUCLEOBASE_VARIANTS = [
(ADENINE, ["AMP", "ADP", "ATP", "1MA", "6MA"]),
(CYTOSINE, ["CMP", "CDP", "CTP", "5MC", "5HC", "5FC", "1CC", "OMC"]),
(GUANINE, ["GMP", "GDP", "GTP", "1MG", "2MG", "M2G", "7MG", "OMG"]),
(URACIL, ["UMP", "UDP", "UTP", ("PSU", "Ψ"), "H2U", "5MU", "DU"]),
(dTHYMINE, ["TMP", "TDP", "TTP"]),
(UNKNOWN_NA, ["YYG", ("I", "I"), ("DI", "DI")])
]
CAPPING_GROUP_VARIANTS = [
(CAPPING_GROUP, ['ACE', 'NMA', 'IND', 'NCO', 'ANF', 'TOSG', 'FCO',
'MPA', 'NH2'])
]
# yapf: enable
[docs]def make_variants(variants):
"""
Helper function to create modified amino acids and modified nucleotides
:param variants: A list of modified residues. The modified residue will have
all the same properties as the standard one (hydophobicity, charge,
etc.) but its long code (ie, its PDB residue name) will differ, and if
a tuple of (string, string) is provided, both its long code and short
code will differ.
:param variants: list of (`ResidueType`, list of string or (string,string))
:returns: a list of residue variants
:rtype: list of `ResidueType`
"""
non_std_residues = []
for res, variant_list in variants:
for name in variant_list:
if isinstance(name, tuple):
three_letter, one_letter = name
else:
three_letter, one_letter = name, res.short_code
variant = res.makeVariant(three_letter, one_letter)
non_std_residues.append(variant)
return non_std_residues
[docs]def merge_dicts(*dict_args):
"""
Merge any number of dictionaries into a single dictionary.
Note that repeated keys will be silently overwritten with the last value.
"""
return dict(itertools.chain(*(d.items() for d in dict_args)))
[docs]def make_one_letter_map(res_list):
return {res.short_code: res for res in res_list}
[docs]def make_three_letter_map(res_list):
return {res.long_code: res for res in res_list}
# Charge variants have their charge set correctly below.
NON_STD_AMINO_ACIDS = make_variants(AMINO_ACID_VARIANTS)
AMINO_ACIDS = STD_AMINO_ACIDS + NON_STD_AMINO_ACIDS + [UNKNOWN]
STD_AMINO_ACIDS_ONE_LETTER = make_one_letter_map(STD_AMINO_ACIDS)
STD_AMINO_ACIDS_THREE_LETTER = make_three_letter_map(STD_AMINO_ACIDS)
AMINO_ACIDS_THREE_LETTER = make_three_letter_map(AMINO_ACIDS)
AMINO_ACIDS_THREE_LETTER['HIP'].charge = RESIDUE_CHARGE.Positive
for aa in ['ASH', 'ARN', 'GLH', 'HID', 'HIE', 'LYN']:
AMINO_ACIDS_THREE_LETTER[aa].charge = RESIDUE_CHARGE.Neutral
CAPPING_GROUPS = make_variants(CAPPING_GROUP_VARIANTS)
CAPPING_GROUP_ALPHABET = make_three_letter_map(CAPPING_GROUPS)
AMINO_ACIDS_AND_CAPPING_GROUPS = merge_dicts(STD_AMINO_ACIDS_ONE_LETTER,
AMINO_ACIDS_THREE_LETTER,
CAPPING_GROUP_ALPHABET)
STD_AMINO_ACIDS_AND_CAPPING_GROUPS = merge_dicts(STD_AMINO_ACIDS_ONE_LETTER,
STD_AMINO_ACIDS_THREE_LETTER,
CAPPING_GROUP_ALPHABET)
NON_STD_NUCLEOBASES = make_variants(NUCLEOBASE_VARIANTS)
NUCLEOBASES = STD_RNA_BASES + STD_DNA_BASES + NON_STD_NUCLEOBASES
DNA_NUCLEOBASES = STD_DNA_BASES
RNA_NUCLEOBASES = STD_RNA_BASES
DNA_THREE_LETTER = make_three_letter_map(DNA_NUCLEOBASES)
RNA_THREE_LETTER = make_three_letter_map(RNA_NUCLEOBASES)
NA_THREE_LETTER = make_three_letter_map(NUCLEOBASES)
DNA_ONE_LETTER = make_one_letter_map(DNA_NUCLEOBASES)
RNA_ONE_LETTER = make_one_letter_map(RNA_NUCLEOBASES)
DNA_ALPHABET = merge_dicts(DNA_THREE_LETTER, DNA_ONE_LETTER)
RNA_ALPHABET = merge_dicts(RNA_THREE_LETTER, RNA_ONE_LETTER)
ALL_ELEMENT_TYPES = {}
for ele_type in STD_AMINO_ACIDS + CAPPING_GROUPS + STD_RNA_BASES + STD_DNA_BASES + [
UNKNOWN, UNKNOWN_NA
]:
ele_type_key = ele_type.short_code, ele_type.long_code, ele_type.name
ALL_ELEMENT_TYPES[ele_type_key] = ele_type
_nonstandard_residue_db = None
_protein_alphabet = None
[docs]def get_protein_alphabet():
"""
Return a cached map of amino acid element types.
Includes definitions from the nonstandard residues database.
:rtype: types.MappingProxyType
"""
global _protein_alphabet
global _nonstandard_residue_db
if _nonstandard_residue_db is None:
_nonstandard_residue_db = nonstandard_residues.get_residue_database()
_nonstandard_residue_db.residuesChanged.connect(
_invalidate_protein_alphabet)
if _protein_alphabet is None:
alphabet = _get_nonstandard_residues()
alphabet.update(STD_AMINO_ACIDS_AND_CAPPING_GROUPS)
# Add standard nonstandards (e.g. HID/HIE/HIP)
alphabet.update(_get_residue_variants())
unk = UNKNOWN
alphabet[unk.short_code] = unk
alphabet[unk.long_code] = unk
_protein_alphabet = types.MappingProxyType(alphabet)
return _protein_alphabet
def _invalidate_protein_alphabet():
global _protein_alphabet
_protein_alphabet = None
@functools.lru_cache()
def _get_residue_variants():
"""
Get built-in variants of standard amino acids
"""
result = dict()
for long_code, short in structure.RESIDUE_MAP_3_TO_1_LETTER.items():
if long_code in STD_AMINO_ACIDS_THREE_LETTER:
continue
std_type = STD_AMINO_ACIDS_ONE_LETTER.get(short.upper())
if std_type is not None:
nonstandard = (short != std_type.short_code)
variant = std_type.makeVariant(long_code,
short,
nonstandard=nonstandard)
result[long_code] = variant
return result
def _get_nonstandard_residues():
"""
Generate a dictionary of nonstandard amino acid element types from the
nonstandard residues database.
:rtype: dict
"""
result = dict()
# The database object is cached but this may need to read it from disk
db = nonstandard_residues.get_residue_database()
for aa in db.amino_acids:
if aa.standard:
continue
short_code = aa.code
long_code = aa.name
aligns_with = aa.aligns_with
if aligns_with:
base_element_type = STD_AMINO_ACIDS_ONE_LETTER[aligns_with]
element_type = base_element_type.makeVariant(long_code)
else:
element_type = UNKNOWN.makeVariant(long_code, short_code)
result[long_code] = element_type
return result
[docs]def any_structured_residues(residues):
"""
Returns whether any of the given residues are structured.
:param residues: The iterable of residues to check
:type residues: iterable(residue.Residue)
:return: True if the given residues contain one that is structured.
:rtype: bool
"""
return any(res.hasStructure() for res in residues)
[docs]class AbstractSequenceElement(json.JsonableClassMixin):
# This class intentionally doesn't have an __init__ method since
# instantiating Gap and Residue objects needs to be as fast as possible and
# calling super().__init__ would slow down their __init__s.
def _getNewInstance(self):
"""
Helper for copying
"""
return self.__class__()
def __copy__(self):
new_res = self._getNewInstance()
new_res.is_gap = self.is_gap
new_res.sequence = self.sequence
return new_res
def __deepcopy__(self, memo):
# Same as __copy__ (none of the public attrs should be deepcopied)
return copy.copy(self)
@property
def is_res(self):
"""
Utility function to check whether a residue is not a gap
"""
return not self.is_gap
@property
def sequence(self):
"""
The sequence that this element is part of. Will be `None` if this
residue is not part of a sequence. Stored as a weakref to avoid
circular references.
:rtype: sequence.AbstractSingleChainSequence
"""
if self._sequence is None:
return None
else:
return self._sequence()
@sequence.setter
def sequence(self, value):
if value is None:
self._sequence = None
else:
self._sequence = weakref.ref(value)
[docs] def hasStructure(self):
"""
:return: Whether this element has a structure i.e. whether it has
corresponding XYZ coordinates in Maestro.
:rtype: bool
"""
seq = self.sequence
seq_has_structure = seq is not None and seq.hasStructure()
return self.is_res and not self.seqres_only and seq_has_structure
@property
def idx_in_seq(self):
if self.sequence is None:
return None
return self.sequence.index(self)
@property
def gapless_idx_in_seq(self):
"""
Return the index of this residue within its sequence ignoring gaps
:return: Index of this residue in its sequence or None if it
is not part of a sequence.
:rtype: int or None
"""
if self.sequence is None:
return None
return self.sequence.index(self, ignore_gaps=True)
[docs]class Gap(AbstractSequenceElement):
__slots__ = ("_sequence")
is_gap = True
[docs] def __init__(self):
self.sequence = None
def __str__(self):
return "~"
[docs] def toJsonImplementation(self):
return {'is_gap': self.is_gap}
[docs] @classmethod
def fromJsonImplementation(cls, json_obj):
if json_obj.get('is_gap', None) is not True:
raise ValueError('Attempting to deserialize a non-gap object')
return cls()
[docs]class Residue(AbstractSequenceElement):
"""
An amino acid residue.
"""
__slots__ = (
"_sequence",
"type",
"_inscode",
"_resnum",
"seqres_only",
"disulfide_bond",
"pred_disulfide_bond",
"b_factor",
"molecule_number",
"pfam", # Either None or 1-char string
"pred_secondary_structure", # Structure.SS_HELIX/SS_STRAND/etc
"secondary_structure", # structure.SS_HELIX/SS_STRAND/etc
"pred_accessibility", # predictors.SolventAccessibility
"pred_disordered", # predictors.Disordered
"pred_domain_arr", # predictors.DomainArrangement
"area", # Solvent accessible area
"composition", # Amino acid composition in profile residues.
"domains", # name of the domain(s) to which the residue belongs
"kinase_features", # kinase feature label
"_kinase_conservation", # kinase conservation category
"_descriptors", # dict mapping descriptor name to val
"gpcr_segment",
"gpcr_generic_number",
)
# Default values for any attributes that shouldn't default to None;
# must not be mutable
_DEFAULT_ATTR_VALS = {"area": 0.0}
is_gap = False
@property
def descriptors(self):
if self._descriptors is None:
self._descriptors = dict()
return self._descriptors
@property
def kinase_conservation(self):
if self._kinase_conservation is None:
self._kinase_conservation = dict()
return self._kinase_conservation
[docs] def __init__(self,
element_type,
inscode=None,
resnum=None,
seqres_only=False):
"""
:param element_type: The kind of the residue
:type element_type: ResidueType
:param inscode: The insertion code
:type inscode: str
:param resnum: PDB residue number
:type resnum: int
:param seqres_only: Whether this residue only appears in the SEQRES
record of a structure. Only applies to sequences associated with a
structure.
:type seqres_only: bool
"""
self.type = element_type
self._inscode = inscode
self._resnum = resnum
self.seqres_only = seqres_only
# Do *not* add new instance attributes here, as it will slow down
# Residue instantiation. Instead, add the attribute name to __slots__
# and add the initial value to `_DEFAULT_ATTR_VALS` unless the initial
# value should be None.
def __getattr__(self, attr):
if attr not in self.__slots__:
raise AttributeError(
f"'{self.__class__.__name__}' has no attribute '{attr}'")
val = self._DEFAULT_ATTR_VALS.get(attr)
# set the value of the attribute so that repeatedly accessing it doesn't
# incur time penalties from repeated __getattr__ calls
setattr(self, attr, val)
return val
[docs] def toJsonImplementation(self):
json_dict = {
'inscode': self.inscode,
'resnum': self.resnum,
'element_type': [
self.type.short_code, self.type.long_code, self.type.name
],
'seqres_only': self.seqres_only,
}
for key, val in (
('b_factor', self.b_factor),
('secondary_structure', self.secondary_structure),
('pred_secondary_structure', self.pred_secondary_structure),
('pred_accessibility', self.pred_accessibility),
('pred_disordered', self.pred_disordered),
('pred_domain_arr', self.pred_domain_arr),
('area', self.area),
('composition', self.composition),
('kinase_features', self.kinase_features),
('_descriptors', self._descriptors),
('gpcr_segment', self.gpcr_segment),
('gpcr_generic_number', self.gpcr_generic_number),
):
if val is not None:
json_dict[key] = val
if self._kinase_conservation is not None:
# JSON requires string keys, so cast to str
json_dict['_kinase_conservation'] = {
str(k): v for k, v in self._kinase_conservation.items()
}
return json_dict
[docs] @classmethod
def fromJsonImplementation(cls, json_obj):
# TODO MSV-1504: separate logic for different sequence types
elem_type = tuple(json_obj.pop('element_type'))
try:
res_type = ALL_ELEMENT_TYPES[elem_type]
except KeyError:
long_code = elem_type[1]
res_type = get_protein_alphabet().get(long_code)
if res_type is None:
short_code = elem_type[0]
if short_code == UNKNOWN_NA.short_code:
res_type = UNKNOWN_NA.makeVariant(long_code)
else:
res_type = UNKNOWN.makeVariant(long_code)
res = cls(element_type=res_type,
inscode=json_obj['inscode'],
seqres_only=json_obj['seqres_only'],
resnum=json_obj['resnum'])
if 'area' in json_obj:
res.area = json_obj['area']
if 'composition' in json_obj:
res.composition = json_obj['composition']
if 'b_factor' in json_obj:
res.b_factor = json_obj['b_factor']
if 'secondary_structure' in json_obj:
res.secondary_structure = json_obj['secondary_structure']
if 'pred_secondary_structure' in json_obj:
res.pred_secondary_structure = json_obj['pred_secondary_structure']
from schrodinger.protein import predictors
if json_obj.get('pred_accessibility'):
res.pred_accessibility = predictors.SolventAccessibility.fromJson(
json_obj['pred_accessibility'])
if json_obj.get('pred_disordered'):
res.pred_disordered = predictors.Disordered.fromJson(
json_obj['pred_disordered'])
if json_obj.get('pred_domain_arr'):
res.pred_domain_arr = predictors.DomainArrangement.fromJson(
json_obj['pred_domain_arr'])
kinase_feature = json_obj.get('kinase_features')
if kinase_feature is not None:
res.kinase_features = annotation.KinaseFeatureLabel.fromJson(
kinase_feature)
res._descriptors = json_obj.get('_descriptors')
kinase_conservation = json_obj.get('_kinase_conservation')
if kinase_conservation is not None:
kinase_conservation = {
int(k): annotation.KinaseConservation.fromJson(v)
for k, v in kinase_conservation.items()
}
res._kinase_conservation = kinase_conservation
gpcr_segment = json_obj.get('gpcr_segment')
gpcr_number = json_obj.get('gpcr_generic_number')
res.gpcr_segment = gpcr_segment
res.gpcr_generic_number = gpcr_number
return res
def __str__(self):
"""
Returns the short code for the residue
"""
return str(self.type)
def __repr__(self):
class_fmt = "%s({parts})" % self.__class__.__name__
parts = [repr(self.type)]
kwarg_list = self._getReprKwargList()
for kwarg_name, kwarg_val in kwarg_list:
parts.append("{0}={1}".format(kwarg_name, kwarg_val))
return class_fmt.format(parts=", ".join(parts))
def _getNewInstance(self):
return self.__class__(self.type)
def __copy__(self):
new_res = super().__copy__()
blacklist = self._getCopyBlackList()
for name in type(self).__slots__:
if name in blacklist:
continue
setattr(new_res, name, getattr(self, name))
return new_res
def __deepcopy__(self, memo):
new_res = copy.copy(self)
blacklist = self._getCopyBlackList()
for name in type(self).__slots__:
if name in blacklist:
continue
value = getattr(self, name)
setattr(new_res, name, copy.deepcopy(value, memo))
return new_res
def _getCopyBlackList(self):
"""
A list of attributes to not copy or deepcopy.
:return: Attribute names to not copy or deepcopy
:rtype: set(str)
"""
# Don't copy type because it's intended to be a singleton
# Don't copy sequence because it's set in the superclass
# disulfide_bond is not copied due to the invariant that a maximum of 2
# residues may contain the same disulfide bond
return {"type", "sequence", "disulfide_bond", "pred_disulfide_bond"}
def _getReprKwargList(self):
"""
Return a list of kwargs that should be specified in the repr.
:return: list of (kwarg_key, kwarg_value) pairs
:rtype: list[tuple(str, str)]
"""
resnum = "None" if self.resnum is None else "%s" % self.resnum
kwarg_list = [('inscode', "'%s'" % self.inscode), ('resnum', resnum)]
if self.molecule_number is not None:
kwarg_list.append(('molnum', str(self.molecule_number)))
if self.seqres_only:
kwarg_list.append(('seqres_only', 'True'))
return kwarg_list
[docs] def getChainKey(self):
# TODO MSV-2379 account for all items in WHResidue.d_hash
return ResidueChainKey(self.resnum, self.inscode)
[docs] def getKey(self):
"""
Get a key that uniquely identifies the residue relative to structures.
:return: Residue key as (entry_id, chain, inscode, resnum, molnum,
pdbname), or None if residue is unparented or has no structure
:rtype: ResidueKey or NoneType
"""
seq = self.sequence
if seq is None or not self.hasStructure() or seq.entry_id is None:
return None
return get_residue_key(self, seq.entry_id, seq.structure_chain)
[docs] def hasSetResNum(self) -> bool:
"""
:return: Whether a specific resnum has been set
"""
return self._resnum is not None
@property
def resnum(self):
"""
If resnum is set to None, resnum will be auto-generated from column
number.
"""
if not self.hasSetResNum() and self.sequence is not None:
return self.idx_in_seq + 1
return self._resnum
@resnum.setter
def resnum(self, value):
self._resnum = value
@property
def inscode(self):
"""
If inscode and rescode are both set to None, the inscode will be '+'.
"""
if self._inscode is None:
if self.hasSetResNum():
return ' '
return '+'
return self._inscode
@inscode.setter
def inscode(self, value):
self._inscode = value
@property
def rescode(self):
if self.resnum is not None:
return str(self.resnum) + self.inscode
@property
def short_code(self):
return self.type.short_code
@property
def long_code(self):
return self.type.long_code
@property
def chain(self):
"""
The name of the sequence chain that this residue belongs to.
:rtype: str
"""
return "" if self.sequence is None else self.sequence.chain
@property
def structure_chain(self):
"""
The name of chain for the structure that this sequence is associated
with. This is normally the same as `chain`, but it can be different if
the user manually links this sequence to a structure with differing
chain names.
:rtype: str
"""
return "" if self.sequence is None else self.sequence.structure_chain
@property
def hydrophobicity(self):
"""
:return: Hydrophobicity for the ResidueType on the Kyte-Doolittle scale,
if available; otherwise None.
:rtype: float
"""
return self.type.hydrophobicity
@property
def hydrophilicity(self):
"""
:return: Hydrophilicity for the ResidueType on the Hopp-Woods scale,
if available; otherwise None
:rtype: float
"""
return self.type.hydrophilicity
@property
def charge(self):
"""
:return: charge of the ResidueType of the residue
:rtype: RESIDUE_CHARGE
"""
return self.type.charge
@property
def helix_propensity(self):
"""
:return: Helix propensity for the ResidueType of the residue
:rtype: `HELIX_PROPENSITY`
"""
return self.type.helix_propensity
@property
def beta_strand_propensity(self):
"""
:return: Beta-strand propensity for the ResidueType of the residue
:rtype: `BETA_STRAND_PROPENSITY`
"""
return self.type.beta_strand_propensity
@property
def turn_propensity(self):
"""
:return: Turn propensity for the ResidueType of the residue
:rtype: `TURN_PROPENSITY`
"""
return self.type.turn_propensity
@property
def helix_termination_tendency(self):
"""
:return: Helix termination tendency for the ResidueType of the residue
:rtype: `HELIX_TERMINATION_TENDENCY`
"""
return self.type.helix_termination_tendency
@property
def exposure_tendency(self):
"""
:return: Solvent exposure tendency for the ResidueType of the residue
:rtype: `SOLVENT_EXPOSURE_TENDENCY`
"""
return self.type.exposure_tendency
@property
def steric_group(self):
"""
:return: Steric group for the ResidueType of the residue
:rtype: `STERIC_GROUP`
"""
return self.type.steric_group
@property
def side_chain_chem(self):
"""
:return: Side chain chemistry for the ResidueType of the residue
:rtype: `SIDE_CHAIN_CHEM`
"""
return self.type.side_chain_chem
@property
def ss_prediction_sspro(self):
"""
Returns a DSSP code matching the secondary structure prediction for the
residue or None. Value is calculated from the SSpro backend.
"""
return self._ss_prediction_sspro
@property
def ss_prediction_psipred(self):
"""
Returns a DSSP code matching the secondary structure prediction for the
residue or None. Value is calculated from thePsiPred backend.
"""
return self._ss_prediction_psipred
@ss_prediction_psipred.setter
def ss_prediction_psipred(self, value):
if value not in list(DSSP_CODES):
raise ValueError("%s is not a valid DSSP code" % value)
self._ss_prediction_psipred = value
@ss_prediction_sspro.setter
def ss_prediction_sspro(self, value):
if value not in list(DSSP_CODES):
raise ValueError("%s is not a valid DSSP code" % value)
self._ss_prediction_sspro = value
@property
def isoelectric_point(self):
"""
:return: A float representing the isoelectric point value for the
ResidueType of the residue
"""
return self.type.isoelectric_point
[docs] @check_if_comparable(default_return=_DEFAULT_SIMILARITY)
def getSimilarity(self, ref_res, similarity_matrix=constants.BLOSUM62):
"""
Returns the similarity between the residue and a reference residue
:param ref_res: The reference residue
:type ref_res: `schrodinger.protein.residue.Residue`
:param similarity_matrix: The scoring matrix to use
:return: similarity score based on the similarity matrix
:rtype: float
"""
return get_matrix_value(similarity_matrix, self.short_code,
ref_res.short_code)
[docs] @check_if_comparable()
def getBinarySimilarity(self,
ref_res,
similarity_matrix=constants.BLOSUM62):
"""
Returns if the residue and a reference residue are similar
:param ref_res: The reference residue
:type ref_res: `schrodinger.protein.residue.Residue`
:param similarity_matrix: The scoring matrix to use
:return: 1 if the similarity score is positive, otherwise 0.
:rtype: int
"""
return 1 if self.getSimilarity(ref_res, similarity_matrix) > 0.0 else 0
[docs] @check_if_comparable()
def getIdentity(self, ref_res):
"""
Return the identity between the residue and a reference residue
:param ref_res: The reference residue
:type ref_res: `schrodinger.protein.residue.Residue`
:return: 1 if same as the reference residue, 0 otherwise.
:rtype: int
"""
res_type = self.type
ref_res_type = ref_res.type
if (res_type.short_code == ref_res_type.short_code and
res_type.name == ref_res_type.name):
return 1
return 0
[docs] @check_if_comparable()
def getIdentityStrict(self, ref_res):
"""
Return the identity between the residue and a reference residue without
considering nonstandard amino acids identical to their related standard
amino acid.
See getIdentity for additional documentation.
"""
res_type = self.type
ref_res_type = ref_res.type
maybe_identical = self.getIdentity(ref_res)
if maybe_identical:
if not res_type.nonstandard and not ref_res_type.nonstandard:
return maybe_identical
elif res_type.nonstandard and ref_res_type.nonstandard:
return int(res_type.long_code == ref_res_type.long_code)
return 0
[docs] @check_if_comparable()
def getConservation(self, ref_res):
"""
Return whether the residue and a reference residue have similar
side-chain chemistry.
The similarity criterion is based on "side chain chemistry"
descriptor matching.
:param ref_res: The reference residue
:type ref_res: `schrodinger.protein.residue.Residue`
:return: 1 if the residue and reference residue are have similar side
chain chemistry, 0 otherwise.
:rtype: int
"""
res_group = _HOMOLOGY_GROUPS.get(self.short_code)
ref_group = _HOMOLOGY_GROUPS.get(ref_res.short_code)
if res_group and ref_group and (res_group == ref_group):
return 1
return 0
[docs] def getStructureResProperties(self):
"""
Return all properties for the corresponding structure residue's alpha
carbon. Properties that apply to the whole residue are stored as atom
properties on this atom. An empty dictionary will be returned if this
residue doesn't have a corresponding alpha carbon.
:return: A dictionary-like object containing the properties.
:rtype: structure._StructureAtomProperty or dict
"""
seq = self.sequence
if seq is None or self.seqres_only or not seq.hasStructure():
return {}
struc_res = seq.getStructureResForRes(self)
if struc_res is None:
return {}
calpha = struc_res.getAlphaCarbon()
if calpha is None:
return {}
return calpha.property
[docs] def updateDescriptors(self, descriptors):
"""
Updates the descriptor dicts with new descriptor values
:param descriptors: A dict mapping descriptor names to their values
:type descriptors: dict[str, float]
"""
self.descriptors.update(descriptors)
[docs] def getDescriptorValue(self, descriptor_name):
return self.descriptors.get(descriptor_name)
[docs] def getDescriptorKeys(self):
return self.descriptors.keys()
[docs] def getProperty(self, seq_prop):
"""
Get the residue's value corresponding to the given SequenceProperty
object
:param seq_prop: The object describing the residue property
:type seq_prop: schrodinger.protein.properties.SequenceProperty
:return: The value of the sequence property
:rtype: str, int, float or None
"""
if seq_prop.property_type == properties.PropertyType.StructureProperty:
struc_props = self.getStructureResProperties()
prop_val = struc_props.get(seq_prop.property_name)
else:
prop_val = self.getDescriptorValue(seq_prop.property_name)
return prop_val
[docs]class CombinedChainResidueWrapper:
"""
A wrapper for a residue or gap so that res.sequence points to the
combined-chain sequence and res.idx_in_seq gives the index within the
combined-chain sequence.
Note that these wrappers are generated as needed and the combined-chain
sequence does not store any reference to the generated instances. As such,
these residues should not be stored using weakrefs and should not be
compared using identity. Also note that these residues will not compare
equal to the split-chain residues that they wrap.
"""
[docs] def __init__(self, res, combined_chain_seq):
"""
:param res: The residue or gap to wrap
:type res: AbstractSequenceElement
:param combined_chain_seq: The combined-chain sequence that the residue
is part of.
:type combined_chain_seq: sequence.CombinedChainProteinSequence
"""
self._res = res
self._seq = combined_chain_seq
def __getattr__(self, attr_name):
return getattr(self._res, attr_name)
def __dir__(self):
attr_names = set(dir(self._res))
attr_names.update(self.__dict__.keys())
attr_names.update(self.__class__.__dict__.keys())
return attr_names
def __eq__(self, other):
if isinstance(other, CombinedChainResidueWrapper):
return self._res == other._res
else:
return False
def __hash__(self):
return hash((self.__class__, self._res))
def __str__(self):
return str(self._res)
def __repr__(self):
return f"{self.__class__.__name__}({repr(self._res)})"
def __copy__(self):
raise RuntimeError(f"Cannot copy {self.__class__.__name__} instances")
def __deepcopy__(self, memo):
raise RuntimeError(f"Cannot copy {self.__class__.__name__} instances")
def _inSeq(self):
"""
Does this residue still exist in the sequence it was created from? This
will return False if the residue has been removed from its split-chain
sequence or if the chain has been removed from the combined-chain
sequence.
:rtype: bool
"""
return self._res.sequence in self._seq.chains
@property
def sequence(self):
"""
The combined-chain sequence that the residue is part of, or None if the
residue has been removed from the sequence.
:rtype: sequence.CombinedChainProteinSequence or None
"""
if self._inSeq():
return self._seq
else:
return None
@property
def idx_in_seq(self):
"""
This residue's index in the combined-chain sequence, or None if the
residue has been removed from the sequence.
:rtype: int or None
"""
if self._inSeq():
return self._seq.index(self)
else:
return None
@property
def split_res(self):
"""
The split-chain residue or gap that this residue is wrapping.
:rtype: AbstractSequenceElement
"""
return self._res
@property
def split_sequence(self):
"""
The split-chain sequence that this residue is part or, or None if the
residue has been removed from the sequence.
:rtype: sequence.ProteinSequence or None
"""
return self._res.sequence
@property
def disulfide_bond(self):
"""
The current disulfide bond, if any, that this residue is involved in.
:rtype: CombinedChainDisulfideBond or None
"""
if self._inSeq():
split_bond = self._res.disulfide_bond
if split_bond is not None and split_bond.isValid():
combined_bond = CombinedChainDisulfideBond(
split_bond, self._seq)
# Make sure that neither residue is part of a chain that's been
# removed from the combined sequence
if all(res.sequence is not None for res in combined_bond):
return combined_bond
return None
[docs]class DisulfideBond:
"""
Representation of a disulfide bond.
"""
[docs] def __init__(self, res1, res2):
"""
:param res1: A residue in the bond
:type res1: Residue
:param res2: The other residue in the bond
:type res2: Residue
"""
self._res_pair = weakref.WeakSet((res1, res2))
def __iter__(self):
key = lambda r: r.idx_in_seq if r.sequence is not None else 0
return iter(sorted(self._res_pair, key=key))
@property
def res_pair(self):
return tuple(self)
[docs] def isValid(self):
"""
Check whether the disulfide bond is valid and if so, return its seqs.
:return: False if the disulfide bond is invalid, the seqs otherwise.
:rtype: bool or list(sequence.ProteinSequence, sequence.ProteinSequence)
"""
too_short = len(self.res_pair) < 2
deleted_res = any(res.sequence is None for res in self.res_pair)
return not (too_short or deleted_res)
@property
def is_intra_sequence(self):
"""
Check whether the bond is valid and intra-sequence.
:return: Whether the bond is a valid, intra-sequence bond.
:rtype: bool
:raise ValueError: If the bond is not valid
"""
if not self.isValid():
raise ValueError("Bond is not valid")
seq1, seq2 = [res.sequence for res in self.res_pair]
return seq1 is seq2
@property
def is_inter_sequence(self):
"""
Check whether the bond is valid and inter-sequence.
:return: Whether the bond is a valid, inter-sequence bond.
:rtype: bool
:raise ValueError: If the bond is not valid
"""
if not self.isValid():
raise ValueError("Bond is not valid")
seq1, seq2 = [res.sequence for res in self.res_pair]
return seq1 is not seq2
[docs]class CombinedChainDisulfideBond(DisulfideBond):
"""
A disulfide bond in a `sequence.CombinedChainProteinSequence`.
"""
[docs] def __init__(self, bond, seq):
"""
:param bond: The split-chain disulfide bond.
:type bond: DisulfideBond
:param seq: The combined-chain sequence that this bond is in.
:type seq: sequence.CombinedChainProteinSequence
"""
self._res_pair = {CombinedChainResidueWrapper(res, seq) for res in bond}
self._split_seq_bond = bond
def __eq__(self, other):
if isinstance(other, CombinedChainDisulfideBond):
return self._split_seq_bond == other._split_seq_bond
elif isinstance(other, DisulfideBond):
return self._split_seq_bond == other
else:
return super().__eq__(other)
def __hash__(self):
return hash(self._split_seq_bond)
@property
def is_intra_sequence(self):
return True
@property
def is_inter_sequence(self):
return False
[docs]def add_disulfide_bond(res1, res2, known=True):
"""
Add a disulfide bond between two residues.
:param res1: A residue to link with a disulfide bond
:type res1: residue.Residue
:param res2: Another residue to link with a disulfide bond
:type res2: residue.Residue
:param known: Whether the bond is a known bond or a predicted bond.
:type known: bool
"""
bond_attr_name = 'disulfide_bond' if known else 'pred_disulfide_bond'
if getattr(res1, bond_attr_name) is not None:
raise ValueError(f'"{res1}" is already a part of a bond')
elif getattr(res2, bond_attr_name) is not None:
raise ValueError(f'"{res2}" is already a part of a bond')
bond = DisulfideBond(res1, res2)
for res in res1, res2:
setattr(res, bond_attr_name, bond)
res1.sequence.clearDisulfideBondsCache()
res2.sequence.clearDisulfideBondsCache()
[docs]def remove_disulfide_bond(bond):
"""
Remove a disulfide bond between two residues.
:param bond: The bond to disconnect
:type bond: residue.DisulfideBond
"""
res1, res2 = bond.res_pair
res1.sequence.clearDisulfideBondsCache()
res2.sequence.clearDisulfideBondsCache()
if bond == res1.disulfide_bond == res2.disulfide_bond:
for res in bond.res_pair:
res.disulfide_bond = None
elif bond == res1.pred_disulfide_bond == res2.pred_disulfide_bond:
for res in bond.res_pair:
res.pred_disulfide_bond = None
else:
assert False, 'Undefined behavior'
[docs]class Nucleotide(Residue):
# TODO (MSV-1504): Create proper nucleic acid domain objects
pass