Source code for schrodinger.protein.tasks.sta
import copy
from schrodinger.models import parameters
from schrodinger.protein import alignment
from schrodinger.protein import sequence
from schrodinger.tasks import tasks
[docs]class STATask(tasks.SubprocessCmdTask):
    output: list
    #########################
    # TASK METHODS
    #########################
    def _getQueryFile(self):
        return self.getTaskFilename(self.name + '.seq')
    def _getTemplateFile(self):
        return self.getTaskFilename(self.name + '-template.pdb')
    @tasks.preprocessor
    def _writeInput(self):
        query_file = self._getQueryFile()
        seq = copy.deepcopy(self.input.query_seq)
        seq.removeAllGaps()
        aln = alignment.ProteinAlignment([seq])
        aln.toFastaFile(query_file)
        template_file = self._getTemplateFile()
        ref_struc = self.input.ref_seq.getStructure()
        ref_struc.write(template_file)
[docs]    def makeCmd(self):
        """
        @overrides: tasks.AbstractCmdTask
        """
        cmd = ['sta']
        args = [
            '-NOJOBID',
            '-template_pdb',
            self._getTemplateFile(),
            '-template_chain_id',
            self.input.ref_seq.structure_chain,
            self._getQueryFile(),
        ]
        if self.input.protein_family is not None:
            args.extend(['-protein_family', self.input.protein_family])
        for pair in self.input.constraints:
            args.extend(['-pair', self._formatConstraint(*pair)])
        cmd.extend(args)
        return cmd 
    @staticmethod
    def _formatConstraint(query_res, structured_res):
        """
        Format a constraint for STA. A constraint is formatted as 1-based
        residue indices of the query residue and the structured
        residue joined by an underscore. e.g. query_res 1 and structured_res 0
        is represented as 2_1.
        :param query_res: The query sequence residue to constrain
        :type query_res: residue.Residue
        :param structured_res: The structured sequence residue to constrain
        :type structured_res: residue.Residue
        """
        # Backend takes 1-based residue indexes
        structured_idx = structured_res.idx_in_seq + 1
        query_idx = query_res.idx_in_seq + 1
        return f"{query_idx}_{structured_idx}"
    @tasks.postprocessor
    def _readOutputFile(self):
        output_file = self.getTaskFilename(self.name + '.raw')
        aligned_ref_seq_parts = []
        aligned_query_seq_parts = []
        with open(output_file) as fh:
            for line in fh:
                if line.startswith('ProbeAA:'):
                    aligned_query_seq_parts.append(line.split()[1])
                elif line.startswith('Fold AA:'):
                    aligned_ref_seq_parts.append(line.split()[2])
        if not aligned_ref_seq_parts or not aligned_query_seq_parts:
            raise RuntimeError("No output")
        aligned_ref_seq = "".join(aligned_ref_seq_parts)
        aligned_query_seq = "".join(aligned_query_seq_parts)
        self.output = [aligned_ref_seq, aligned_query_seq]
[docs]    def getGaps(self):
        if not self.output:
            raise RuntimeError("Cannot get gaps without output")
        aligned_ref_seq, aligned_query_seq = self.output
        ref_gaps = [i for i, ch in enumerate(aligned_ref_seq) if ch == "."]
        query_gaps = [i for i, ch in enumerate(aligned_query_seq) if ch == "."]
        return (ref_gaps, query_gaps)