Source code for schrodinger.protein.tasks.sta
import copy
from schrodinger.models import parameters
from schrodinger.protein import alignment
from schrodinger.protein import sequence
from schrodinger.tasks import tasks
[docs]class STATask(tasks.SubprocessCmdTask):
output: list
#########################
# TASK METHODS
#########################
def _getQueryFile(self):
return self.getTaskFilename(self.name + '.seq')
def _getTemplateFile(self):
return self.getTaskFilename(self.name + '-template.pdb')
@tasks.preprocessor
def _writeInput(self):
query_file = self._getQueryFile()
seq = copy.deepcopy(self.input.query_seq)
seq.removeAllGaps()
aln = alignment.ProteinAlignment([seq])
aln.toFastaFile(query_file)
template_file = self._getTemplateFile()
ref_struc = self.input.ref_seq.getStructure()
ref_struc.write(template_file)
[docs] def makeCmd(self):
"""
@overrides: tasks.AbstractCmdTask
"""
cmd = ['sta']
args = [
'-NOJOBID',
'-template_pdb',
self._getTemplateFile(),
'-template_chain_id',
self.input.ref_seq.structure_chain,
self._getQueryFile(),
]
if self.input.protein_family is not None:
args.extend(['-protein_family', self.input.protein_family])
for pair in self.input.constraints:
args.extend(['-pair', self._formatConstraint(*pair)])
cmd.extend(args)
return cmd
@staticmethod
def _formatConstraint(query_res, structured_res):
"""
Format a constraint for STA. A constraint is formatted as 1-based
residue indices of the query residue and the structured
residue joined by an underscore. e.g. query_res 1 and structured_res 0
is represented as 2_1.
:param query_res: The query sequence residue to constrain
:type query_res: residue.Residue
:param structured_res: The structured sequence residue to constrain
:type structured_res: residue.Residue
"""
# Backend takes 1-based residue indexes
structured_idx = structured_res.idx_in_seq + 1
query_idx = query_res.idx_in_seq + 1
return f"{query_idx}_{structured_idx}"
@tasks.postprocessor
def _readOutputFile(self):
output_file = self.getTaskFilename(self.name + '.raw')
aligned_ref_seq_parts = []
aligned_query_seq_parts = []
with open(output_file) as fh:
for line in fh:
if line.startswith('ProbeAA:'):
aligned_query_seq_parts.append(line.split()[1])
elif line.startswith('Fold AA:'):
aligned_ref_seq_parts.append(line.split()[2])
if not aligned_ref_seq_parts or not aligned_query_seq_parts:
raise RuntimeError("No output")
aligned_ref_seq = "".join(aligned_ref_seq_parts)
aligned_query_seq = "".join(aligned_query_seq_parts)
self.output = [aligned_ref_seq, aligned_query_seq]
[docs] def getGaps(self):
if not self.output:
raise RuntimeError("Cannot get gaps without output")
aligned_ref_seq, aligned_query_seq = self.output
ref_gaps = [i for i, ch in enumerate(aligned_ref_seq) if ch == "."]
query_gaps = [i for i, ch in enumerate(aligned_query_seq) if ch == "."]
return (ref_gaps, query_gaps)