Source code for schrodinger.protein.seqres
"""
Functions for getting and setting SEQRES data on a structure.
"""
import decorator
from schrodinger.infra import mm
from schrodinger.infra.util import OneIndexedList
_SEQRES_BLOCK = "m_PDB_SEQRES"
_SEQRES_PROPS = ["s_pdb_chain_id", "s_pdb_SEQRES"]
@decorator.decorator
def _mminit(func, *args, **kwargs):
"""
A decorator for initializing and terminating the mmlibs required for reading
and writing SEQRES data.
"""
try:
mm.mmerr_initialize()
mm.m2io_initialize(mm.error_handler)
mm.mmct_initialize(mm.error_handler)
return func(*args, **kwargs)
finally:
mm.mmct_terminate()
mm.m2io_terminate()
mm.mmerr_terminate()
[docs]@_mminit
def get_seqres(st):
"""
Read in all SEQRES data from the structure.
:param st: The structure to read SEQRES data from
:type st: schrodinger.structure.Structure
:return: A dictionary of {chain name: list of residue names} (ordered
identically to the SEQRES data in the `Structure`) or None if there's no
SEQRES information present in the structure. Note that the lists of
residue names uses a OneIndexedList since the i_pdb_seqres_index residue
property values assume a starting index of one.
:rtype: dict(str, OneIndexedList[str]) or NoneType
NOTE: Residue names in output lists contain 3 characters each.
"""
try:
data_handle = mm.mmct_ct_m2io_get_unrequested_handle(st)
except mm.MmException:
data_handle = mm.mmct_ct_get_or_open_additional_data(st, True)
num_seqres_blocks = mm.m2io_get_number_blocks(data_handle, _SEQRES_BLOCK)
if not num_seqres_blocks:
# There's no seqres information present
return None
mm.m2io_goto_block(data_handle, _SEQRES_BLOCK, 1)
try:
num_rows = mm.m2io_get_index_dimension(data_handle)
seqres_by_chain = {}
for row in range(1, num_rows + 1):
chain, seqres = mm.m2io_get_string_indexed(data_handle, row,
_SEQRES_PROPS)
res_strings = []
for i in range(0, len(seqres), 4):
# i = index to seqres that marks the start of current residue
res_str = seqres[i:i + 4]
if res_str[-1] != ' ':
raise ValueError(
f'4-character residue names not supported: "{res_str}"')
res_strings.append(res_str[:-1])
seqres = OneIndexedList(res_strings)
seqres_by_chain[chain] = seqres
finally:
mm.m2io_leave_block(data_handle)
return seqres_by_chain
[docs]@_mminit
def set_seqres(st, seqres):
"""
Add SEQRES data to a structure. Any SEQRES data that was previously present
in the structure will be overwritten.
:param st: The structure to add SEQRES data to.
:type st: schrodinger.structure.Structure
:param seqres: A dictionary of {chain name: list of residue names}. May be
None or an empty dictionary to clear SEQRES data. Each residue name
must be either 3-characters or 4-characters long.
:type seqres: dict(str, list[str]) or None
"""
data_handle = mm.mmct_ct_get_or_open_additional_data(st, True)
# if there's any SEQRES data present, delete it (otherwise the new data will
# be ignored)
if mm.m2io_get_number_blocks(data_handle, _SEQRES_BLOCK):
mm.m2io_delete_named_block(data_handle, _SEQRES_BLOCK)
if not seqres:
# There's no new data to write
return
mm.m2io_open_block(data_handle, _SEQRES_BLOCK)
try:
mm.m2io_set_index_dimension(data_handle, len(seqres))
for idx, (chain_name, res_names) in enumerate(seqres.items(), start=1):
# Generally residue names are 3-characters long, but APIs allow for
# up to 4 characters. Convert 3-character names to 4 characters by
# adding a trailing space. NOTE that for most structures this will
# produce a string that ends with a space (fixing PPREP-1802)
def make_4_chars(resname):
if len(resname) == 3:
return f'{resname} '
elif len(resname) == 4:
return resname
else:
raise ValueError(
f'Invalid residue name: "{resname}" (expecting 3 or 4 characters)'
)
res_names = ''.join(map(make_4_chars, res_names))
mm.m2io_put_string_indexed(data_handle, idx, _SEQRES_PROPS,
[chain_name, res_names])
finally:
mm.m2io_close_block(data_handle)
[docs]@_mminit
def has_seqres(st):
"""
Determine if a structure contains SEQRES data.
:param st: The structure to read SEQRES data from.
:type st: schrodinger.structure.Structure
:return: Whether the structure contains SEQRES data.
:rtype: bool
"""
try:
data_handle = mm.mmct_ct_m2io_get_unrequested_handle(st)
except mm.MmException:
data_handle = mm.mmct_ct_get_or_open_additional_data(st, True)
num_seqres_blocks = mm.m2io_get_number_blocks(data_handle, _SEQRES_BLOCK)
return num_seqres_blocks > 0