"""
Module for capping uncapped terminal residues in a protein structure by
adding NME or ACE caps as appropriate. Also has functionality for adding a
C-terminal oxygen in places where this is missing.
Usage for capping: capped_st = cap_termini(input_st)
Usage for adding C-terminal oxygens: capped_st = add_terminal_oxygens(input_st)
Copyright Schrodinger, LLC. All rights reserved.
"""
from schrodinger import structure
from schrodinger.protein import sequence
from schrodinger.structutils import build
from schrodinger.structutils.interactions import hbond
INSCODES = [
' ', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
]
DONTCAP, CAPC, CAPN, CAPBOTH = list(range(4))
[docs]def count_atom_hbonds(st, atom):
"""
Return the number of hydrogens bonds that the given atom is involved in.
"""
return len(hbond.get_hydrogen_bonds(st, [atom]))
[docs]def res_can_be_capped(res):
"""
Determines whether a fragment is able to be capped.
Returns CAPN if specified residue can be N-capped, CAPC if C-capped;
DONTCAP if it can't be capped.
"""
# A PDB residue has PDB atom names of:
# " CA "
# " N "
# " C "
# " O "
# Find it if this residue has a free N or C terminal
if not res.getAlphaCarbon() or not res.getBackboneOxygen():
# Not a standard residue
return DONTCAP
nitrogen = res.getBackboneNitrogen()
carbonyl_c = res.getCarbonylCarbon()
if not nitrogen or not carbonyl_c:
# Not a standard residue
return DONTCAP
n_bonded = False
for neighbor in nitrogen.bonded_atoms:
if neighbor.pdbname == " C ":
# N is bonded to another residue
n_bonded = True
c_bonded = False
for neighbor in carbonyl_c.bonded_atoms:
if neighbor.pdbname in [" N ", " N1 "]:
# C is bonded to another residue
c_bonded = True
# Valid residue, contains alpha-carbon, nitrogen, and C=O:
if not c_bonded and not n_bonded:
# Both terminals are un-capped
return CAPBOTH
elif not c_bonded: # C terminal
return CAPC # doesn't need to be capped
elif not n_bonded: # N terminal
return CAPN
else:
return DONTCAP # residue does not need to be capped
def _find_hydrogen_to_replace(st, atom):
"""
Return atom number of a hydrogen bound to atom. If no hydrogens
are present, perform htreat, and return one of the new hydrogens
while deleting all other added hydrogens (if more than 1 was added).
Return None if can't find any after adding hydrogens either.
"""
# Ev:88825 If the nitrogen has a positive charge, neutralize it, so
# that we don't end up with N+ and 3 Hydrogens
if atom.atomic_number == 7 and atom.formal_charge == 1:
atom.formal_charge = 0
if len(atom.bond) > 3:
# Delete the extra hydrogen (if this atom has hydrogens):
for n in atom.bonded_atoms:
if n.atomic_number == 1:
st.deleteAtoms([n.index])
# NOTE: The <nitrogen> atom object will automatically
# renumber itself in the case that the deleted atom
# index is lower than its own.
break
# Add hydrogens to the atom (in case they are missing), otherwise we
# won't have a hydrogen to replace:
build.add_hydrogens(st, atom_list=[int(atom)])
# Search again and return the highest numbered hydrogen PYTHON-2797
hydrogens = [at2 for at2 in atom.bonded_atoms if at2.atomic_number == 1]
if hydrogens:
# In practice, seems like the the last atom in the "hydrogens" list
# is always highest-numbered, but just in case, we do the sort:
highest_h = max(hydrogens, key=lambda at: at.index)
return highest_h
# For some reasons still no hydrogens. Most often this means that this
# residue already has some kind of a cap (e.g. " OXT").
return None
def _increment_inscode(inscode):
"""
Returns next inscode above <inscode>
"""
# Ev:64830
try:
return INSCODES[INSCODES.index(inscode) + 1]
except:
raise Exception("Could not determine good INS code for cap residue")
###############################################################################
##### Class for adding NME and ACE caps to uncapped protein termini #######
###############################################################################
[docs]class CapTermini:
[docs] def __init__(self, st, verbose=False, frag_min_atoms=150):
"""
Add caps to uncapped terminal residues in the input structure 'st'.
Returns a list of residues capped
frag_min_atoms - peptide fragments with less than this number of atoms
will not be capped (default 150). Set to 0 to cap all fragments.
"""
self.verbose = verbose
self._frag_min_atoms = frag_min_atoms
self._capped_residues = [] # for self.cappedResidues()
self._cap_residues = [] # for self.capResidues()
self._analyzed_residues = []
self._output_st = None
# Save the original atom numbers (for debugging):
for atom in st.atom:
atom.property['i_ppw_original_index'] = atom.index
# Make a list of residue names in use in the structure:
# list of [chain, resname, inscode]
self.used_residues = []
for res in st.residue:
resid = [res.chain, res.resnum, res.inscode]
self.used_residues.append(resid)
# Go through all sequences, capping each one.
any_capped = self.capSequences(st)
if any_capped:
# If any sequence was capped, reorder the CT by connectivity
# (Originally needed for Prime, Ev:58331)
st = build.reorder_protein_atoms_by_sequence(st)
self._output_st = st
[docs] def capSequences(self, st):
"""
Cap both ends of each sequence in the given structure.
"""
c_ca_atoms = []
n_ca_atoms = []
for seqi, seq in enumerate(sequence.get_structure_sequences(st)):
seq_atoms = seq.getAtomIndices()
if len(seq_atoms) < self._frag_min_atoms:
# Skip small fragments (ligands)
continue
# Find the termini to cap:
for res in seq.residue:
res_type = res_can_be_capped(res)
if res_type == DONTCAP:
continue
# Save an atom object from the residue, as it will be
# automatically re-numbered when other caps are added.
res_ca = res.getAlphaCarbon()
if res_type == CAPC or res_type == CAPBOTH:
c_ca_atoms.append(res_ca)
if res_type == CAPN or res_type == CAPBOTH:
n_ca_atoms.append(res_ca)
# Ev:64830 In order for resname & inscodes for the caps
# to be correct, must cap C termini of all fragments before
# capping any N termini.
any_capped = False
for res_ca in c_ca_atoms:
res = res_ca.getResidue()
if self.verbose:
print('CapTermini: Attempting to C cap sequence %i (%i atoms)' % \
(seqi, len(seq_atoms)))
self._addCCap(res)
any_capped = True
for res_ca in n_ca_atoms:
res = res_ca.getResidue()
if self.verbose:
print('CapTermini: Attempting to N cap sequence %i (%i atoms)' % \
(seqi, len(seq_atoms)))
self._addNCap(res)
any_capped = True
return any_capped
[docs] def outputStructure(self):
return self._output_st
[docs] def cappedResidues(self):
""" Returns residue strings for residues that were capped. """
return self._capped_residues
[docs] def capResidues(self):
""" Returns residue strings for the added cap residues. """
return self._cap_residues
[docs] def findOxygenToReplace(self, st, c_atom):
"""
Check whether c_atom is bound to 2 terminal oxygens. If so, determine
which one to replace with a cap, and return its atom index.
Return None if can't find 2 terminal oxygens.
"""
# Sometimes there is an oxygen (often " OXT") that is already capping
# the residues. If this is the case replace it. Since in this case
# the " OXT" and the " O " oxygens are equivalent (hydoxylate),
# replace the one which is trans (or the one that make no h-bonds).
ca_atom = None
terminal_oxygens = []
for n in c_atom.bonded_atoms:
# if an oxygen is protonated, remove that hydrogen
if n.element == "O" and len(n.bond) == 2:
h_atom = [a for a in n.bonded_atoms if a.element == "H"]
if len(h_atom) > 0:
st.deleteAtoms(h_atom)
# terminal oxygens
if n.element == "O" and len(n.bond) == 1:
terminal_oxygens.append(n.index)
# backbone C-alpha
if n.pdbname == " CA ":
ca_atom = n
# Ev:69692, If there is an OXT (O-) terminal atom, replace it:
# Ev:120504 Sometimes the " OXT" hydrgen is double-bonded,
# and sometimes the names " OXT" and " O " were flipped.
if len(terminal_oxygens) == 2:
ox1 = terminal_oxygens[0]
ox2 = terminal_oxygens[1]
ox1_hbonds = count_atom_hbonds(st, ox1)
ox2_hbonds = count_atom_hbonds(st, ox2)
replace_ox = None
other_ox = None
if ox2_hbonds > ox1_hbonds:
replace_ox = ox1
other_ox = ox2
print(
"Replacing oxygen %i because it makes makes fewer H-bonds" %
replace_ox)
elif ox1_hbonds > ox2_hbonds:
replace_ox = ox2
other_ox = ox1
print(
"Replacing oxygen %i because it makes makes fewer H-bonds" %
replace_ox)
else:
# Replace the oxygen that is trans:
n_atom = None
for n in ca_atom.bonded_atoms:
if n.pdbname == " N ":
n_atom = n
break
# Figure out which oxygen has the fewest angle:
torsion1 = abs(st.measure(n_atom, ca_atom, c_atom, ox1))
torsion2 = abs(st.measure(n_atom, ca_atom, c_atom, ox2))
if torsion1 > torsion2:
replace_ox = ox1
other_ox = ox2
else:
replace_ox = ox2
other_ox = ox1
print("Replacing oxygen %i because it is trans" % replace_ox)
# Make sure this bond is single-order, and the other is double:
st.getBond(c_atom, replace_ox).order = 1
st.getBond(c_atom, other_ox).order = 2
st.atom[other_ox].formal_charge = 0 # Fix for Ev:124473
st.atom[other_ox].retype() # Fix for Ev:124473
st.atom[other_ox].pdbname = " O " # make sure correct name
return st.atom[replace_ox]
return None
def _getLowerName(self, residue):
"""
Return resnum & inscode corresponding with the residue that would be
lower in sequence than the specified residue.
"""
# Ev:64830
chain = residue.chain
resnum = residue.resnum
# Name cap residue as <resnum-1:' '> if possible
# If not, name it <resnum-1:A>, <resnum-1:B>, etc.
new_inscode = ' '
# Keep incrementing inscode until OK:
while True:
newresid = [chain, resnum - 1, new_inscode]
if newresid not in self.used_residues:
return newresid
# new_inscode is already in use
new_inscode = _increment_inscode(new_inscode)
def _getHigherName(self, residue):
"""
Return resnum & inscode corresponding to a residue which would be
one higher in sequence than the input residue.
"""
# Ev:64830
chain = residue.chain
resnum = residue.resnum
# Name the cap residue <resnum:A> or <resnum:B>, etc.
new_inscode = _increment_inscode(residue.inscode)
# Keep incrementing inscode until OK:
while True:
newresid = [chain, resnum, new_inscode]
if newresid not in self.used_residues:
return newresid
# new_inscode is already in use
new_inscode = _increment_inscode(new_inscode)
def _addCCap(self, residue):
"""
Add a NMA residue to the ' C ' atom of the specified residue
"""
st = residue._st
# Find the N atom of the terminal:
carbon = residue.getCarbonylCarbon()
if not carbon:
raise Exception("No ' C ' in terminal residue!")
if self.verbose:
i = carbon.property['i_ppw_original_index']
print("CapTermini: Capping C termini: %s, atom %s" %
(str(residue), i))
# Figure out which hydrogen to replace:
replace_atom = self.findOxygenToReplace(st, carbon)
if not replace_atom:
replace_atom = _find_hydrogen_to_replace(st, carbon)
if not replace_atom:
print(
"WARNING captermini: No hydrogen to replace with NMA fragment on C atom %s"
% int(carbon))
print(" Skipping the group")
return
# Figure out what resnum, inscode to give to the new cap:
chain, new_resnum, new_inscode = self._getHigherName(residue)
capres, renumber_map = self.attachCap(residue, carbon, replace_atom,
'NMA')
capres.chain = chain
capres.resnum = new_resnum
capres.inscode = new_inscode
# NOTE: The <carbon> atom object will automatically renumber itself
# in the case that the deleted atom index is lower than its own.
# adjust the omega torsion (deviant definition of omega)
self.adjustDihedral(st, residue.getBackboneOxygen(), carbon,
capres.getBackboneNitrogen(),
capres.getAlphaCarbon())
self.resCapped(residue, capres)
[docs] def adjustDihedral(self, st, atom1, atom2, atom3, atom4):
"""
Adjust dihedral between the original residue and the cap to 0 degrees.
"""
if atom1 and atom2 and atom3 and atom4:
st.adjust(0, atom1, atom2, atom3, atom4)
else:
# Should never happen, unless there are other bugs
print('ERROR captermini: Could not adjust dihedral for bond:',
(atom1, atom2, atom3, atom4))
[docs] def resCapped(self, residue, capres):
"""
Record this capping in the internal lists.
"""
self._capped_residues.append(str(residue))
self._cap_residues.append(str(capres))
print("CapTermini: Added %s Cap: %s" %
(capres.pdbres.strip(), str(capres)))
capresid = [capres.chain, capres.resnum, capres.inscode]
self.used_residues.append(capresid)
[docs] def attachCap(self, residue, fromatom, replace_atom, fragname):
"""
Attaches the specified fragment and returns the new Residue object
"""
st = residue._st
# We must determine what atoms were added in this step.
# We will do this by marking each existing atom with a property
for atom in st.atom:
atom.property['b_capres_existed'] = True
# Save the residue info for the residue (atom numbers may change):
chain = residue.chain
resnum = residue.resnum
inscode = residue.inscode
# Break any zero-order bonds to replace_atom (Ev:87195):
delete_bonds_to = [
bond.atom2.index for bond in replace_atom.bond if bond.order == 0
]
for atom2 in delete_bonds_to:
st.deleteBond(replace_atom, atom2)
renumber_map = build.attach_fragment(st, fromatom.index,
replace_atom.index, 'peptide_caps',
fragname)
# The new fragment will have the same residue name and number but
# different resname:
capres_atoms = []
for atom in st.atom:
if 'b_capres_existed' not in atom.property:
# Make sure we don't include atoms from <residue>
capres_atoms.append(atom.index)
else:
del atom.property['b_capres_existed']
cap_res = structure._Residue(st, resnum, inscode, chain, capres_atoms)
return cap_res, renumber_map
def _addNCap(self, residue):
"""
Add an ACE residue to " N " atom of the specified residue
"""
st = residue._st
# Find the N atom of the terminal:
nitrogen = residue.getBackboneNitrogen()
if not nitrogen:
raise Exception("No ' N ' in terminal residue!")
if self.verbose:
i = nitrogen.property['i_ppw_original_index']
print("CapTermini: Capping N termini: %s, atom %s" %
(str(residue), i))
# Figure out which hydrogen to replace:
replace_atom = _find_hydrogen_to_replace(st, nitrogen)
if not replace_atom:
print(
"WARNING captermini: No hydrogen to replace with ACE fragment on N atom %s"
% int(nitrogen))
print(" Skipping the group")
return
# Figure out what resnum, inscode to give to the new cap:
chain, new_resnum, new_inscode = self._getLowerName(residue)
capres, renumber_map = self.attachCap(residue, nitrogen, replace_atom,
'ACE')
capres.chain = chain
capres.resnum = new_resnum
capres.inscode = new_inscode
# NOTE: The <nitrogen> atom object will automatically renumber itself
# in the case that the deleted atom index is lower than its own.
# Adjust the charges of the nitrogen to which the cap is attached,
# in case it was set to something else previously:
nitrogen.partial_charge = 0
nitrogen.solvation_charge = 0
st.retype() # Make sure all MacroModel types are correct
# adjust the omega torsion (deviant definition of omega)
self.adjustDihedral(st, residue.getAlphaCarbon(), nitrogen,
capres.getCarbonylCarbon(),
capres.getBackboneOxygen())
self.resCapped(residue, capres)
[docs]def cap_termini(st):
"""
Cap the termini on the specified st
Function interface for CapTermini class
"""
return CapTermini(st).outputStructure()
[docs]def add_terminal_oxygens(st, frag_min_atoms=150):
"""
Add OXT oxygen to the C-terminal of each poly-peptide chain.
A hydrogen will first be added to the residue and converted to an oxygen.
The bond length is not adjusted, and a minimization would be in order to fix this.
:param st: Structure to add oxygens to
:type st: Structure
:param frag_min_atoms: Minimal atoms required for fragment to be considered
a poly-peptide chain
:type frag_min_atoms: int
:return: List of capped residues
:rtype: List[_Residue]
"""
capped_residues = []
for seq in sequence.get_structure_sequences(st):
seq_atoms = seq.getAtomIndices()
if len(seq_atoms) < frag_min_atoms:
# Skip small fragments (ligands)
continue
for residue in seq.residue:
res_type = res_can_be_capped(residue)
if res_type not in {CAPC, CAPBOTH}:
continue
carbon = residue.getCarbonylCarbon()
if not carbon:
continue
# Find hydrogen to transform to oxygen
hxt_atom = _find_hydrogen_to_replace(st, carbon)
if hxt_atom is None:
continue
hxt_atom.pdbname = ' OXT'
hxt_atom.atomic_number = 8
hxt_atom.formal_charge = -1
oxygen = residue.getBackboneOxygen()
if oxygen is not None:
color = oxygen.color.rgb_float
else:
# Red-ish default color
color = (0xff, 0x2e, 0x2e)
hxt_atom.setColorRGB(*color)
hxt_atom.retype()
capped_residues.append(residue)
return capped_residues
# For debugging purposes:
if __name__ == '__main__':
import sys
verbose = ('-v' in sys.argv)
st = structure.Structure.read(sys.argv[1])
capter = CapTermini(st, verbose=verbose, frag_min_atoms=0)
st = capter.outputStructure()
capped_residues = capter.cappedResidues()
print('Capped residues:', capped_residues)
print('Capped file:', sys.argv[2])
st.write(sys.argv[2])
# EOF