Source code for schrodinger.protein.tasks.muscle
# Runs MUSCLE alignment jobs
# This module provides an interface similar to clustal.py's.
# They might be merged in the future to reduce code duplication
import copy
import glob
import os
import tempfile
from schrodinger.application.msv import seqio
from schrodinger.protein.tasks import clustal
from schrodinger.utils import fileutils
[docs]def get_muscle_path():
    """
    Returns the path to the MUSCLE executable.
    :rtype: str
    :return: path to MUSCLE executable file, or None if the executable could
        not be located.
    """
    pattern = os.path.join(fileutils.get_mmshare_dir(), "bin", "*", "muscle*")
    results = glob.glob(pattern)
    if results:
        return results[0]
    print("No MUSCLE exec found!")
    return None 
[docs]class MuscleJob(clustal.AbstractAlignmentJob):
    """
    Runs alignment via MUSCLE program
    :note: Muscle by default rearranges the order to put similar sequences
           together
    """
[docs]    def __init__(self, aln, second_aln=None, quiet=True, maintain_order=True):
        """
        :param aln: Input sequences
        :type  aln: ProteinAlignment
        :param second_aln: Second alignment for aligning multiple MSAs
        :type  second_aln: ProteinAlignment
        :param maintain_order: Whether to maintain input order of sequences
        :type  maintain_order: bool
        Dev note:
        - Unimplemented features:
        -- Speedup options
        """
        super().__init__(aln, second_aln)
        self.quiet = quiet
        self.maintain_order = maintain_order 
[docs]    def run(self):
        """
        Run the MUSCLE alignment.
        :return: Aligned sequences
        :rtype: ProteinAlignment
        """
        muscle_exe = get_muscle_path()
        if not muscle_exe:
            raise RuntimeError("Could not locate MUSCLE executable!")
        to_write = self.aln
        if self.second_aln:
            # Pad alignments to prevent muscle from adding trailing X
            self.aln.padAlignment()
            self.second_aln.padAlignment()
        else:
            # When only one input alignment is passed to Muscle, it ignores
            # gap-only sequences and excludes them from the output.  To work
            # around this, we remove these sequences and manually add them back
            # to the output alignment.
            empty_seq_indices = [
                i for i, seq in enumerate(self.aln)
                if not seq.getGaplessLength()
            ]
            if empty_seq_indices:
                to_write = copy.deepcopy(self.aln)
                empty_seqs = [to_write[i] for i in empty_seq_indices]
                to_write.removeSeqs(empty_seqs)
        tmpdir_obj = tempfile.TemporaryDirectory()
        with tmpdir_obj as tmpdir:
            inp_fname = os.path.join(tmpdir, 'input.fasta')
            inp_fname2 = None
            out_fname = os.path.join(tmpdir, 'output.fasta')
            if self.second_aln:
                inp_fname2 = os.path.join(tmpdir, 'input2.fasta')
                seqio.FastaAlignmentWriter.write(self.second_aln, inp_fname2)
            in_names = seqio.FastaAlignmentWriter.write(to_write, inp_fname)
            command = self._makeCommand(muscle_exe=muscle_exe,
                                        inp_fname=inp_fname,
                                        out_fname=out_fname,
                                        inp_fname2=inp_fname2)
            # execute MUSCLE
            self._run(command)
            if self.canceled:
                return None
            # read the aligned sequences
            new_alignment = seqio.FastaAlignmentReader.read(out_fname)
            try:
                tmpdir_obj.cleanup()
            except OSError:
                # Debugging for MSV-3768
                print(os.listdir(tmpdir))
        if not self.second_aln and self.maintain_order:  # Reorder as input
            # profile mode does not change the sequence order in each input MSA
            # Note MUSCLE has a built in -stable option which is buggy (and
            # disabled). See http://www.drive5.com/muscle/manual/stable.html
            seqio.reorder_fasta_alignment(new_alignment, in_names)
        if not self.second_aln and empty_seq_indices:
            # add gap-only sequences back in
            for i, seq in zip(empty_seq_indices, empty_seqs):
                # muscle pads sequences with trailing gaps, so we do the same
                # for the gap-only sequences
                seq.removeElements(seq)
                seq.extend(seq.gap_char * new_alignment.num_columns)
                new_alignment.addSeq(seq, i)
        return new_alignment 
    def _makeCommand(self, muscle_exe, inp_fname, out_fname, inp_fname2=None):
        command = [muscle_exe, '-out', out_fname]
        if inp_fname2 is None:
            command.extend(['-in', inp_fname])
        else:  # If aligning alignments
            command.extend(['-profile', '-in1', inp_fname, '-in2', inp_fname2])
        if self.gapopen:
            command.extend(['-gapopen', str(-self.gapopen)])
        if self.gapext:
            command.extend(['-gapextend', str(-self.gapext)])
        if self.quiet:
            command.extend(['-quiet'])
        return command