Source code for schrodinger.protein.tasks.structure_alignment
# Runs SKA structure alignment jobs
import logging
from collections import namedtuple
import schrodinger
from schrodinger.application import ska
from schrodinger.utils import log
TMP_ORIG_ATOM_IDX_PROP = "i_msv_orig_atom_idx"
SEPARATOR = "_"
logger = log.get_output_logger(__file__)
if schrodinger.in_dev_env():
logger.setLevel(logging.DEBUG)
StructAlignResult = namedtuple(
"StructAlignResult",
["ref_string", "other_string", "ref_sse", "other_sse", "psd", "rmsd"])
[docs]def get_unique_seq_names(sequences):
"""
Create unique names for the given sequences for use with structure alignment
The original sequence name can be recreated using `get_orig_seq_name`.
:param sequences: Protein sequences
:type sequences: list[sequence.ProteinSequence]
:return: Generator of tuples of unique sequence names and sequences
:rtype: collections.abc.Iterable[tuple(str, sequence.ProteinSequence)]]
"""
for index, seq in enumerate(sequences):
name = f'{index}{SEPARATOR}{seq.name}'
yield name, seq
[docs]def get_orig_seq_name(prefixed_name):
"""
Given a name output by `get_unique_seq_names`, return the original sequence name
"""
return prefixed_name.split(SEPARATOR, 1)[1]
[docs]def runStructureAlignment(
ref,
others,
keywords=None,
ref_asl=None,
other_asl=None,
):
"""
Runs protein pairwise structure alignment using "ska" backend.
:param ref: Reference name and structure
:type ref: tuple(str, structure.Structure)
:param others: Non-reference (mobile) names and structures
:type others: list[tuple(str, structure.Structure)]
:param keywords: Keywords to pass to ska
:type keywords: dict
:param ref_asl: ASL for reference structure
:type ref_asl: str
:param other_asl: ASL for other structures
:type other_asl: str
:return: Alignment results keyed by non-reference structure names
:rtype: dict(str, StructAlignResult)
"""
if not others:
return {}
if keywords is None:
keywords = {}
try:
all_results = ska.pairwise_align_ct(ref,
others,
keywords=keywords,
log=logger,
save_props=True,
std_res=True,
asl=ref_asl,
asl_mobile=other_asl)
except RuntimeError:
# See log for error info
return {}
ref_name, _ = ref
seq_results = dict()
for result in all_results:
if not result.align:
# No results to process
continue
ref_string = result.align.pop(ref_name)
if len(result.align) != 1:
raise ValueError("Expected only one non-ref item in result align "
f"dict, got {len(result.align)}.")
other_name, other_string = result.align.popitem()
struct_aln_result = StructAlignResult(ref_string, other_string,
result.sse[ref_name],
result.sse[other_name],
result.psd, result.rmsd)
seq_results[other_name] = struct_aln_result
return seq_results