Source code for schrodinger.application.matsci.packmol

"""
Utilities for working with packmol.

Copyright Schrodinger, LLC. All rights reserved.
"""

import os
import string
import sys
import time
import numpy
import argparse
from collections import namedtuple

from schrodinger import structure
from schrodinger.application.desmond import cms
from schrodinger.application.matsci import cgforcefield as cgff
from schrodinger.application.matsci import coarsegrain
from schrodinger.application.matsci import desmondutils
from schrodinger.application.matsci import jobutils
from schrodinger.application.matsci import msconst
from schrodinger.application.matsci import parserutils
from schrodinger.application.matsci import msutils
from schrodinger.application.matsci import textlogger
from schrodinger.application.matsci.mlearn import features
from schrodinger.application.matsci.nano import xtal
from schrodinger.infra import mm
from schrodinger.job import queue
from schrodinger.structutils import color
from schrodinger.test import ioredirect
from schrodinger.utils import fileutils
from schrodinger.utils import subprocess
from schrodinger.utils import cmdline

PDB_EXT = '.pdb'

PDBdata = namedtuple('PDBdata',
                     ['pdbname', 'pdbres', 'resnum', 'chain_name', 'order'])

# since pdbres names are taken to be in a field of 4 chars use the following
# short and rare character to uniqueify them
UNIQUE_PDB_RES_TAG = '#'

DEFAULT_PACKING_F = 0.8
DEFAULT_SEED = 123
DEFAULT_N_LOOP = 100

FALLBACK_THICKNESS = 0

SIDEMAX = 10000.

SURFACTANTS = 'surfactants'
SOLVENTS = 'solvents'
PACKING_PCT = 'packing_pct'
NUMBER = 'number'
LAYER = 'layer'
HYDROPHILIC_IDXS = 'hydrophilic_idxs'
HYDROPHOBIC_IDXS = 'hydrophobic_idxs'
CION_FILE_NAME = 'cion_file_name'

SurfactantInfo = namedtuple('SurfactantInfo', [
    'st', 'name', 'packing_pct', 'number', 'layer', 'hydrophilic_idxs',
    'hydrophobic_idxs', 'cion_st', 'cion_name'
])

SolventInfo = namedtuple('SolventInfo',
                         ['st', 'name', 'packing_pct', 'number', 'layer'])

HYDROPHILIC = 'Hydrophilic'
HYDROPHOBIC = 'Hydrophobic'

MONOLAYER = 'Monolayer'
BILAYER = 'Bilayer'
MICELLE = 'Micelle'
LIPOSOME = 'Liposome'
WORMLIKE_MICELLE = 'Wormlike Micelle'
ELONGATED_MICELLE = 'Elongated Micelle'

FLAG_PACKMOL_INPUT_FILES = '-packmol_input_files'
FLAG_SLB_INPUT_FILE = '-slb_input_file'
FLAG_STRUCTURE_FILE = '-structure_file'
FLAG_BOX_LENGTHS = '-box_lengths'
FLAG_NO_FAILURES = '-no_failures'
FLAG_NO_PREP_MD = '-no_prep_md'
FLAG_FORCE_FIELD = '-force_field'
WATER_FF_TYPE_FLAG = parserutils.WATER_FF_TYPE_FLAG
FLAG_CGFFLD_LOCATION_TYPE = '-cgffld_loc_type'

PROGRAM_NAME = 'Structured Liquid'
DEFAULT_JOB_NAME = 'structured_liquid'

SEPARATOR = ','

SEED = 'seed'
N_LOOP = 'n_loop'
PACKING_F = 'packing_f'
MODEL_TYPE = 'model_type'
MODEL_KWARGS = 'model_kwargs'


[docs]def get_idxs(idxs_str):
    """
    Return integer indices from the given string of indices.

    :type idxs_str: str
    :param idxs_str: the string of indices

    :rtype: tuple
    :return: the integer indices
    """

    return tuple(
        sorted(set([int(idx.strip()) for idx in idxs_str.split(SEPARATOR)])))


[docs]def get_class_map():
    """
    Return the class map.

    :rtype: dict
    :return: the class map
    """

    return {
        MONOLAYER: MonolayerInputFile,
        BILAYER: BilayerInputFile,
        MICELLE: MicelleInputFile,
        LIPOSOME: LiposomeInputFile,
        WORMLIKE_MICELLE: WormlikeMicelleInputFile,
        ELONGATED_MICELLE: ElongatedMicelleInputFile
    }


[docs]def get_pdb_data(pdbname, pdbres, resnum=None, chain_name=None, order=None):
    """
    Return a PDBdata.

    :type pdbname: str
    :param pdbname: the PDB atom name

    :type pdbres: str
    :param pdbres: the PDB residue name

    :type resnum: int or None
    :param resnum: the PDB residue number if needed

    :type chain_name: str or None
    :param chain_name: the PDB chain name if needed

    :type order: int or None
    :param order: a bond order to be used when bonding to the atom
        corresponding to this object

    :rtype: PDBdata
    :return: the PDB data
    """

    if chain_name:
        chain_name = chain_name.strip()
    return PDBdata(pdbname=pdbname.strip(),
                   pdbres=pdbres.strip(),
                   resnum=resnum,
                   chain_name=chain_name,
                   order=order)


[docs]class Job(queue.SubprocessJob):

[docs]    def __init__(self, *args, **kwargs):
        """
        See parent class for documentation.
        """

        self.output_file = kwargs.pop('output_file', None)
        super().__init__(*args, **kwargs)
        self.name = fileutils.get_basename(self.output_file) + '_packmol'

[docs]    def doCommand(self, *args, **kwargs):
        """
        See parent class for documentation.
        """

        # typically ran as 'packmol < input.inp',
        # to avoid shell command pass script and
        # input separately

        self._start_time = time.time()
        script, input_file = self._command
        self._stdin = open(input_file, 'r')
        self._subprocess = subprocess.Popen([script],
                                            stdin=self._stdin,
                                            stdout=self._stdout,
                                            stderr=self._stderr,
                                            universal_newlines=True)

[docs]    def postCommand(self):
        """
        See parent class for documentation.
        """

        super().postCommand()
        self._stdin.close()

[docs]    def update(self):
        """
        See parent class for documentation.
        """

        # packmol failures do not return
        # an error code so manually handle it here

        super().update()
        if self.state == queue.JobState.DONE and self.output_file and \
            not os.path.exists(self.output_file):
            self.state = queue.JobState.FAILED


[docs]def run(input_files, allow_failures=True):
    """
    Run.

    :type input_files: list
    :param input_files: packmol input files

    :type allow_failures: bool
    :param allow_failures: if True then all but a single subjob are
        allowed to fail without causing the main job to fail

    :raise RuntimeError: if all subjobs fail, if allow_failures is False
        then if a single subjob fails then the main job fails

    :rtype: dict
    :return: keys are input files, values are structure output files
    """

    # structure files referenced in the given packmol input files
    # are assumed to exist in the directory from which this function
    # is called

    if allow_failures:
        max_failures = len(input_files) - 1
    else:
        max_failures = 0
    job_dj = queue.JobDJ(verbosity='normal',
                         max_failures=max_failures,
                         max_retries=0)

    # get binary name
    if sys.platform == 'win32':
        packmol_bin = 'packmol.exe'
    else:
        packmol_bin = 'packmol'

    log_fhs = []
    output_files = {}
    for input_file in input_files:
        with open(input_file, 'r') as afile:
            for line in afile:
                tokens = line.strip().split()
                if tokens and tokens[0].lower() == 'output':
                    output_file = tokens[1]
                    break
            else:
                # an input file that does not specify an
                # output file will fail anyway and is handled
                # using max_failures
                output_file = fileutils.get_basename(input_file) + '.out'
        base_name = fileutils.get_basename(output_file)
        log_file = base_name + '.log'
        jobutils.add_outfile_to_backend(log_file, stream=True)
        log_fh = open(log_file, 'w')
        log_fhs.append(log_fh)
        output_files[input_file] = output_file
        cmd = [packmol_bin, input_file]
        job = Job(cmd, stdout=log_fh, stderr=log_fh, output_file=output_file)
        job_dj.addJob(job)

    try:
        job_dj.run()
    finally:
        for log_fh in log_fhs:
            log_fh.close()

    for input_file, output_file in output_files.items():
        if not os.path.exists(output_file):
            output_files[input_file] = None
        jobutils.add_outfile_to_backend(output_file)

    return output_files


[docs]class PDBWriter(structure.PDBWriter):

[docs]    def write(self, ct):
        """
        See parent class for documentation.
        """

        # First check CT is able to be written. TexualStructure objects
        # are not:
        if isinstance(ct, structure.TextualStructure):
            raise Exception("TextualStructure objects can not be written to "
                            "an PDB format file")

        mm.mmpdb_initialize(mm.error_handler)
        fh = mm.mmpdb_new()

        # by default structure.PDBWriter reorders atoms, it is critical
        # that this is avoided due to atom index references in the packmol
        # input file
        mm.mmpdb_set(fh, mm.MMPDB_NO_REORDER_ATOMS)

        mm.mmpdb_write(fh, ct, self.filename)
        mm.mmpdb_delete(fh)


def _set_atom_properties(st, key, available_values, justify=False):
    """
    Set atom properties.

    :type st: schrodinger.structure.Structure
    :param st: the structure

    :type key: str
    :param key: the property key

    :type available_values: list
    :param available_values: unique available values for the property

    :type justify: bool
    :param justify: whether to left justify string values in a field
        of length 4
    """

    # pdb properties like atom name are limited to 4 characters and
    # so we assume no more than 26 identifiers (see more info in the calling
    # code), the following means all possible 26 identifiers have been used,
    # either because all 26 have been assigned by this function or because
    # the input already used all 26 identifiers, returning means that some
    # atoms may not be identified using pdb properties
    if not available_values:
        return

    values = []
    for atom in st.atom:
        value = atom.property.get(key, None)
        if (isinstance(value, str) and value.strip() in ['', 'UNK']) or \
            (isinstance(value, int) and not value):
            value = None
        values.append(value)

    if None not in values:
        return

    new_value = available_values.pop(0)
    if justify:
        new_value = new_value.ljust(4)
    for idx, value in enumerate(values, 1):
        if value is None:
            st.atom[idx].property[key] = new_value


[docs]def set_unique_pdb_atom_names(st):
    """
    Set unique PDB atom names on the given structure.

    :type st: schrodinger.structure.Structure
    :param st: the structure
    """

    adict = {}
    for atom in st.atom:
        idxs = adict.setdefault(atom.pdbres, {}).setdefault(atom.element, [])
        idxs.append(atom.index)
        idx = str(len(idxs))
        atom.pdbname = f'{atom.element}{idx}'.ljust(4)


def _fix_bonding(st, sts, bond_dict=None):
    """
    Fix the bonding output from packmol.

    :type st: schrodinger.structure.Structure
    :param st: the structure to fix

    :type sts: dict
    :param sts: keys are file names (referenced in the given packmol input files),
        values are schrodinger.structure.Structure

    :type bond_dict: dict or None
    :param bond_dict: keys are pair tuples containing PDB atom names and residue
        names, values are triple tuples containing PDB atom names, residue names,
        and bond orders, or None if it is to be determined

    :rtype: dict
    :return: the bond_dict
    """

    # the structure to fix comes from using the PDBReader class
    # on the packmol output pdb file, even though the input pdb
    # files of comoponents have connections the packmol output pdb
    # does not, the PDBReader class by default assigns bonds, yet
    # the assignment can be missing bonds, for both atomistic and
    # CG structures, or the assignment can be wrong, for example
    # due to packmol exiting with a FORCED status which results
    # in non-bonding atoms that are too close to each other,
    # the PDBReader class lacks a clean API to prevent bonding
    # so remove bonds here and rebond

    pairs = [(bond.atom1, bond.atom2) for bond in st.bond]
    for pair in pairs:
        st.deleteBond(*pair)

    if not bond_dict:
        bond_dict = {}
        for file_name, mae_st in sts.items():
            # read the PDB to get the PDB atom names that were
            # used during writing
            pdb_st = structure.Structure.read(file_name)
            # ordering is enforced
            for mae_atom, pdb_atom in zip(mae_st.atom, pdb_st.atom):
                # this tuple will be unique over the entire system
                atuple = get_pdb_data(pdb_atom.pdbname, pdb_atom.pdbres)
                btuples = []
                for mae_neighbor in mae_atom.bonded_atoms:
                    order = mae_st.getBond(mae_atom, mae_neighbor).order
                    pdb_neighbor = pdb_st.atom[mae_neighbor.index]
                    btuple = get_pdb_data(pdb_neighbor.pdbname,
                                          pdb_neighbor.pdbres,
                                          order=order)
                    btuples.append(btuple)
                bond_dict[atuple] = btuples

    idx_dict = {}
    for atom in st.atom:
        atuple = get_pdb_data(atom.pdbname,
                              atom.pdbres,
                              resnum=atom.resnum,
                              chain_name=atom.chain_name)
        idx_dict[atuple] = atom.index

    # see MATSCI-7720 - since only a single instance of each residue type
    # is supported per component there will never be bonds between atoms with
    # the same residue name but different residue numbers, atoms in a
    # component all have the same chain name
    for atuple, idx in idx_dict.items():
        a_key = get_pdb_data(atuple.pdbname, atuple.pdbres)
        for btuple in bond_dict[a_key]:
            b_key = get_pdb_data(btuple.pdbname,
                                 btuple.pdbres,
                                 resnum=atuple.resnum,
                                 chain_name=atuple.chain_name)
            jdx = idx_dict[b_key]
            msutils.add_or_update_bond_order(st, idx, jdx, btuple.order)

    return bond_dict


def _fix_coarse_grained_st(st, sts, cg_info_dict=None):
    """
    Fix the coarse grained structure output from packmol.

    :type st: schrodinger.structure.Structure
    :param st: the structure to fix

    :type sts: dict
    :param sts: keys are file names (referenced in the given packmol input files),
        values are schrodinger.structure.Structure

    :type cg_info_dict: dict or None
    :param cg_info_dict: keys are pair tuples containing PDB atom names and
        residue names, values are coarsegrain.ParticleInfo, or None if it is
        to be determined

    :raise RuntimeError: if there is a problem with the input

    :rtype: dict
    :return: the cg_info_dict
    """

    try:
        is_coarse_grain = coarsegrain.is_coarse_grain_input(
            input_sts=sts.values())
    except ValueError as err:
        raise RuntimeError(str(err))

    if not is_coarse_grain:
        return

    # the packmol input and output pdb files lack coarse-grained particle
    # attributes, since the s_m_pdb_atom_name is limited store that
    # information here so that it can be applied later
    if not cg_info_dict:
        cg_info_dict = {}
        for file_name, mae_st in sts.items():
            # read the PDB to get the PDB atom names that were
            # used during writing
            pdb_st = structure.Structure.read(file_name)
            # ordering is enforced
            for mae_atom, pdb_atom in zip(mae_st.atom, pdb_st.atom):
                key = mae_atom.name
                name = key
                rgb = mae_atom.color.rgb
                radius = mm.mmct_atom_get_display_radius(mae_st, mae_atom.index)
                mass = mm.mmct_atom_get_coarse_grain_mass(
                    mae_st, mae_atom.index)
                info = coarsegrain.ParticleInfo(key=key,
                                                name=name,
                                                rgb=rgb,
                                                radius=radius,
                                                mass=mass)
                # this tuple will be unique over the entire system
                atuple = get_pdb_data(pdb_atom.pdbname, pdb_atom.pdbres)
                cg_info_dict[atuple] = info

    coarsegrain.set_as_coarse_grain(st)
    for atom in st.atom:
        atuple = get_pdb_data(atom.pdbname, atom.pdbres)
        info = cg_info_dict[atuple]
        coarsegrain.set_atom_coarse_grain_properties(st,
                                                     atom,
                                                     info.name,
                                                     rgb=info.rgb,
                                                     radius=info.radius,
                                                     mass=info.mass)

    st.applyStyle(atoms=structure.ATOM_BALLNSTICK,
                  bonds=structure.BOND_BALLNSTICK)

    return cg_info_dict


[docs]def get_cells(input_files, sts, allow_failures=True):
    """
    Return cells.

    :type input_files: dict
    :param input_files: keys are packmol input files, values are tuples of the
        3 box lengths (Angstrom) defining the PBC

    :type sts: dict
    :param sts: keys are file names (referenced in the given packmol input files),
        values are schrodinger.structure.Structure

    :type allow_failures: bool
    :param allow_failures: if True then all but a single subjob are
        allowed to fail without causing the main job to fail

    :raise RuntimeError: if there is a problem with the input

    :rtype: dict
    :return: keys are input files, values are schrodinger.structure.Structure
    """

    # 26 identifiers should be enough (potentially 1 for each componentent that
    # is missing data)
    residue_numbers = list(range(1, 27))
    chain_names = list(string.ascii_uppercase)
    pdb_residue_names = [f'R{i}' for i in residue_numbers]
    pairs = [('i_m_residue_number', residue_numbers),
             ('s_m_chain_name', chain_names),
             ('s_m_pdb_residue_name', pdb_residue_names)]

    # uniqueify
    for st in sts.values():
        for atom in st.atom:
            for key, collection in pairs:
                value = atom.property.get(key)
                if isinstance(value, str):
                    value = value.strip()
                try:
                    collection.remove(value)
                except ValueError:
                    pass

    for file_name, st in sts.items():
        # use the the run function for non-pdb structures, packmol
        # supports pdb, tinker, xyz, and moldy, with pdb the default
        if fileutils.splitext(file_name)[1] != PDB_EXT:
            raise RuntimeError('Only PDB structure files are supported.')

        _set_atom_properties(st, 'i_m_residue_number', residue_numbers)
        _set_atom_properties(st, 's_m_chain_name', chain_names)
        _set_atom_properties(st,
                             's_m_pdb_residue_name',
                             pdb_residue_names,
                             justify=True)

        # warnings regarding nonunique PDB atom names can be thrown,
        # yet in general uniqueifying them within the allowed 4 character
        # max is not possible, for example the packmol source example
        # protein.pdb has multiple atoms with PDB atom name of 'HH11',
        # if the structure lacks s_m_pdb_atom_name properties then the
        # PDBWriter creates them by numbering by element per residue per chain,
        # for example O1, H1, H2, (residue R1, chain A), O1, O2,
        # H1, (residue R2, chain A), O1, H1, H2 (residue R1, chain B), etc.,
        # since unique PDB atom names are required for post-processing use
        # the PDBWriter convention if they aren't already unique

        if len(set(atom.pdbname for atom in st.atom)) < st.atom_total:
            # the PDBWriter PDB atom name assignment convention requires that atoms
            # in a residue have contiguous indexing so instead do it by hand here
            set_unique_pdb_atom_names(st)

        with ioredirect.IOSilence():
            with PDBWriter(file_name) as writer:
                writer.write(st)

    output_files = run(input_files.keys(), allow_failures=allow_failures)

    bond_dict = None
    cg_info_dict = None
    out_sts = {}
    for input_file, output_file in output_files.items():
        if output_file is None:
            continue
        st = structure.Structure.read(output_file)
        box_lengths = input_files[input_file]
        chorus = (box_lengths[0], 0., 0., 0., box_lengths[1], 0., 0., 0.,
                  box_lengths[2])
        xtal.set_pbc_properties(st, chorus)
        if not st.title:
            st.title = fileutils.get_basename(input_file)
        bond_dict = _fix_bonding(st, sts, bond_dict=bond_dict)
        cg_info_dict = _fix_coarse_grained_st(st,
                                              sts,
                                              cg_info_dict=cg_info_dict)
        if not coarsegrain.is_coarse_grain_input(input_sts=sts.values()):
            color.apply_color_scheme(st, 'element')
        # see MATSCI-9010 - allow polymer builder output which features multiple
        # residues by the same name
        unset_unique_pdb_res_names(st)
        out_sts[input_file] = st

    return out_sts


[docs]def write_desmond_cells(input_files,
                        sts,
                        allow_failures=True,
                        force_field=None,
                        water_force_field=msconst.SPC,
                        cg_ff_loc_type=parserutils.LOCAL_CG_FF_LOCATION_TYPE):
    """
    Write Desmond cells.

    :type input_files: dict
    :param input_files: keys are packmol input files, values are tuples of the
        3 box lengths (Angstrom) defining the PBC

    :type sts: dict
    :param sts: keys are file names (referenced in the given packmol input files),
        values are schrodinger.structure.Structure

    :type allow_failures: bool
    :param allow_failures: if True then all but a single subjob are
        allowed to fail without causing the main job to fail

    :type force_field: str
    :param force_field: name of FF to apply

    :type water_force_field: str
    :param water_force_field: name of the water force field to apply,
        options are available in desmondutils

    :type cg_ff_loc_type: str
    :param cg_ff_loc_type: specifies the location to which
        to look for coarse-grained force field files,
        one of parserutils.INSTALLED_CG_FF_LOCATION_TYPE or
        parserutils.LOCAL_CG_FF_LOCATION_TYPE

    :raise RuntimeError: if there is a problem with the input

    :rtype: dict
    :return: keys are input files, values are names of written Desmond `*cms`
        files
    """

    if force_field is None:
        force_field = msutils.get_default_forcefield().name,

    cells = get_cells(input_files, sts, allow_failures=allow_failures)

    cleaner = jobutils.StringCleaner()
    wam_type = jobutils.WAM_TYPES.MS_PREPARE_FOR_MD

    output_files = {}
    for input_file, st in cells.items():
        base_name = fileutils.get_basename(input_file)
        input_file = base_name + '_out.mae'
        st.write(input_file)
        st = xtal.sync_pbc(st, create_pbc=True)
        title = cleaner.cleanAndUniquify(st.title)
        pbc_position = st.property.get(xtal.PBC_POSITION_KEY,
                                       xtal.CENTER_PBC_POSITION)
        rezero_system = not pbc_position.startswith(xtal.ANCHOR_PREFIX)
        sysfile = desmondutils.run_system_builder(
            st,
            title,
            forcefield=force_field,
            rezero_system=rezero_system,
            split_components=True,
            water_fftype=water_force_field,
            set_incorporation=False,
            wam_type=wam_type,
            location_type=cg_ff_loc_type)
        if not sysfile:
            if not allow_failures:
                msg = f'The Desmond system could not be prepared for input {st.title}.'
                raise RuntimeError(msg)
            continue
        output_files[input_file] = sysfile

    for output_file in output_files.values():
        if not os.path.exists(output_file):
            msg = f'The Desmond system output file {output_file} can not be found.'
            raise RuntimeError(msg)
        model = cms.Cms(file=output_file)
        try:
            desmondutils.assign_lipid_ff(model)
        except ValueError:
            # this means no known lipid template is present, so
            # nothing to do
            pass
        else:
            model.write(output_file)
            jobutils.add_wam_to_cms(output_file, wam_type)
        jobutils.add_outfile_to_backend(output_file)

    return output_files


[docs]def set_unique_pdb_res_names(st):
    """
    Set unique PDB residue names on the given structure.

    :raise RuntimeError: if there is a problem with the input

    :type st: schrodinger.structure.Structure
    :param st: the structure
    """

    adict = {}
    for residue in st.residue:
        adict.setdefault(residue.pdbres.strip(), []).append(residue)
    for name, residues in adict.items():
        if len(residues) == 1:
            continue
        for tag_idx, residue in enumerate(residues, 1):
            pdbres = f'{name}{UNIQUE_PDB_RES_TAG}{tag_idx}'
            if len(pdbres) > 4:
                raise RuntimeError(
                    'The uniqueified PDB residue name '
                    f'of {pdbres} for residue {name} has more than 4 '
                    'characters which is not supported.')
            residue.pdbres = pdbres.ljust(4)


[docs]def unset_unique_pdb_res_names(st):
    """
    Unset unique PDB residue names on the given structure.

    :type st: schrodinger.structure.Structure
    :param st: the structure
    """

    for residue in st.residue:
        try:
            name, tag_idx = residue.pdbres.strip().split(UNIQUE_PDB_RES_TAG)
        except ValueError:
            continue
        residue.pdbres = name.ljust(4)


[docs]def group_infos_by_layer(infos):
    """
    Group the given information objects into a dictionary keyed by layer.

    :type infos: list[`SurfactantInfo`] or list[`SolventInfo`]
    :param infos: contains surfactant or solvent infos

    :rtype: dict
    :return: the information objects keyed by layer
    """

    adict = {}
    for info in infos:
        adict.setdefault(info.layer, []).append(info)
    return adict


[docs]def get_max_distance_btw_groups(st, idxs, jdxs):
    """
    Return the maximum distance (Angstrom) between
    the given groups of atom indices.

    :type st: `schrodinger.structure.Structure`
    :param st: the structure

    :type idxs: tuple
    :param idxs: first group of atom indices

    :type jdxs: tuple
    :param jdxs: second group of atom indices

    :rtype: float
    :return: the maximum distance in Angstrom
    """

    # it is possibile that this distance would better be taken
    # as a projection onto a given axis, for example c lattice,
    # radial, etc. vector

    amax = -numpy.inf
    for idx in idxs:
        for jdx in jdxs:
            if idx == jdx:
                continue
            dist = st.measure(idx, jdx)
            dist += st.atom[idx].radius + st.atom[jdx].radius
            amax = max(amax, dist)
    if amax == -numpy.inf:
        idx = idxs[0]
        amax = 2 * st.atom[idx].radius
    return amax


[docs]class PackmolInputFileException(Exception):
    pass


[docs]class PackmolInputFile:
    """
    Manage a packmol input file.
    """

    IN_EXT = '.inp'

[docs]    def __init__(self,
                 tolerance=2.,
                 filetype='pdb',
                 output_base_name='packmol',
                 comment='',
                 general_body=''):
        """
        Create an instance.

        :type tolerance: float
        :param tolerance: the distance tolerance in Angstrom

        :type filetype: str
        :param filetype: the file type to use for all structure files,
            pdb, tinker, xyz, or moldy

        :type output_base_name: str
        :param output_base_name: the base name to use for the
            packmol output structure file

        :type comment: str
        :param comment: a comment placed at the top of the the packmol
            input file, include preceeding '#'

        :type general_body: str
        :param general_body: the general body, should contain newlines,
            this is for any additional top level parameters that do not
            have to do with structures
        """

        self.tolerance = tolerance
        self.filetype = filetype
        self.output = output_base_name + f'.{self.filetype}'
        self.comment = comment
        self.general_body = general_body

        self.structure_bodies = []

[docs]    def addStructureBody(self, base_name, body):
        """
        Add a structure body to the input file.

        :type base_name: str
        :param base_name: the base name of the input structure file

        :type body: str
        :param body: the body, should contain indentation and newlines
        """

        self.structure_bodies.append((base_name, body))

[docs]    def addStructureBodies(self):
        """
        Add structure bodies to the input file.

        :raise PackmolInputFileException: if there is an issue with the input
        """

        raise NotImplementedError

[docs]    def write(self, input_base_name='packmol'):
        """
        Write the packmol input file.

        :type input_base_name: str
        :param input_base_name: the base name to use for the
            packmol input file

        :raise PackmolInputFileException: if there is an issue with the input

        :rtype: str
        :return: the packmol input file name
        """

        if not self.structure_bodies:
            raise PackmolInputFileException('No structural information '
                                            'has been provided.')

        input_file_name = f'{input_base_name}{self.IN_EXT}'
        with open(input_file_name, 'w') as afile:
            if self.comment:
                afile.write(f'{self.comment}\n\n')

            afile.write(f'tolerance {str(self.tolerance)}\n')
            afile.write(f'filetype {self.filetype}\n')
            afile.write(f'output {self.output}\n\n')

            if self.general_body:
                afile.write(f'{self.general_body}')

            for base_name, body in self.structure_bodies:
                afile.write(f'structure {base_name}.{self.filetype}\n')
                afile.write(f'{body}')
                afile.write('end structure\n\n')

        return input_file_name


[docs]class StructuredLiquidInputFile(PackmolInputFile):
    """
    Manage a structured liquid input file.
    """

[docs]    def __init__(self, *args, **kwargs):
        """
        See parent class for documentation.
        """

        super().__init__(*args, **kwargs)
        self._number_dict = {}
        self._volume_dict = {}

[docs]    def check(self):
        """
        Check cell lengths.

        :raise PackmolInputFileException: if there is an issue with the input
        """

        la, lb, lc = self.cell_lengths
        a_min, b_min, c_min = self.getMinCellLengths()
        if a_min is not None and a_min > la:
            vector = 'a-vector'
        elif b_min is not None and b_min > lb:
            vector = 'b-vector'
        elif c_min is not None and c_min > lc:
            vector = 'c-vector'
        else:
            vector = None
        if vector:
            raise PackmolInputFileException(
                f'The length of the {vector} is '
                'not large enough to accomodate surfactant and solvent layers.')

[docs]    def prepare(self,
                cell_lengths,
                surfactant_infos,
                solvent_infos,
                layer_sep=1,
                packing_f=DEFAULT_PACKING_F,
                min_constraint_window_surfactant_idxs=25):
        """
        Prepare.

        :type cell_lengths: tuple
        :param cell_lengths: the cell lengths of the output structure file

        :type surfactant_infos: list
        :param surfactant_infos: contains SurfactantInfo

        :type solvent_infos: list
        :param solvent_infos: contains SolventInfo

        :type layer_sep: float
        :param layer_sep: the layer separation in Angstrom

        :type packing_f: float
        :param packing_f: a packing efficiency factor used to control the density
            of surfactant and solvent molecules

        :type min_constraint_window_surfactant_idxs: float
        :param min_constraint_window_surfactant_idxs: this is the minimum window length
            for constraining surfactant hydrophilic and hydrophobic indices as a percentage
            of the surfactant length, should be in (0, 50)

        :raise PackmolInputFileException: if there is an issue with the input
        """

        for info in surfactant_infos + solvent_infos:
            self._number_dict[info] = 0
            self._volume_dict[info] = 0

        if not 0 < min_constraint_window_surfactant_idxs < 50:
            raise PackmolInputFileException(
                'The minimum constraint window for surfactant '
                'hydrophilic and hydrophobic end atom indices should be a percentage in the '
                'range of (0, 50).')

        self.cell_lengths = cell_lengths
        self.surfactant_infos = group_infos_by_layer(surfactant_infos)
        self.solvent_infos = group_infos_by_layer(solvent_infos)

        # for coarse-grained systems the layer separation does not need to be
        # changed to account for particle radii, this effect is handled elsewhere
        self.layer_sep = layer_sep

        self.packing_f = packing_f

        # set some distance attributes for surfactant
        self.max_surfactant_dists = {}
        for layer, infos in self.surfactant_infos.items():
            max_surfactant_dist = -numpy.inf
            max_hydrophilic_dist = -numpy.inf
            max_hydrophobic_dist = -numpy.inf
            for info in infos:
                max_surfactant_dist = max(
                    max_surfactant_dist,
                    get_max_distance_btw_groups(info.st, info.hydrophilic_idxs,
                                                info.hydrophobic_idxs))
                max_hydrophilic_dist = max(
                    max_hydrophilic_dist,
                    get_max_distance_btw_groups(info.st, info.hydrophilic_idxs,
                                                info.hydrophilic_idxs))
                max_hydrophobic_dist = max(
                    max_hydrophobic_dist,
                    get_max_distance_btw_groups(info.st, info.hydrophobic_idxs,
                                                info.hydrophobic_idxs))
            amin = min_constraint_window_surfactant_idxs * max_surfactant_dist / 100
            max_hydrophilic_dist = max(max_hydrophilic_dist, amin)
            max_hydrophobic_dist = max(max_hydrophobic_dist, amin)
            self.max_surfactant_dists[layer] = (max_surfactant_dist,
                                                max_hydrophilic_dist,
                                                max_hydrophobic_dist)

        # set some distance attributes for counter-ion
        self.max_cion_dists = self.getMaxDists(self.surfactant_infos)

        # set some distance attributes for solvent
        self.max_solvent_dists = self.getMaxDists(self.solvent_infos)

[docs]    def getMaxDists(self, all_infos):
        """
        Return a dictionary of maximum distances (Ang.) among the structures
        in each layer of the given all_infos.

        :type all_infos: dict
        :param all_infos: keys are layers, values are lists containing either
            SurfactantInfo or SolventInfo

        :rtype: dict
        :return: keys are layers, values are maximum distances
        """

        max_dists = {}
        for layer, infos in all_infos.items():
            max_dist = -numpy.inf
            for info in infos:
                if isinstance(info, SurfactantInfo):
                    st = info.cion_st
                    if not st:
                        continue
                else:
                    st = info.st
                idxs = list(range(1, st.atom_total + 1))
                max_dist = max(max_dist,
                               get_max_distance_btw_groups(st, idxs, idxs))
            max_dists[layer] = max_dist
        return max_dists

[docs]    def getBoxSliceVol(self, cmin, cmax):
        """
        Return the box slice volume.

        :type cmin: float
        :param cmin: the lower bound on the layer in Angstrom

        :type cmax: float
        :param cmax: the upper bound on the layer in Angstrom

        :rtype: float
        :return: the box slice volume in Ang.^3
        """

        la, lb, lc = self.cell_lengths
        return la * lb * (cmax - cmin)

[docs]    def getSphereSliceVol(self, rmin, rmax):
        """
        Return the sphere slice volume.

        :type rmin: float
        :param rmin: the lower bound on the layer in Angstrom

        :type rmax: float
        :param rmax: the upper bound on the layer in Angstrom

        :rtype: float
        :return: the sphere slice volume in Ang.^3
        """

        return 4 * numpy.pi * (rmax**3 - rmin**3) / 3

[docs]    def getOutsideSphereVol(self, radius):
        """
        Return the box volume less the sphere volume.

        :type radius: float
        :param radius: the radius of the sphere in Angstrom

        :rtype: float
        :return: the box volume less the sphere volume in Ang.^3
        """

        sphere_vol = self.getSphereSliceVol(0, radius)
        la, lb, lc = self.cell_lengths
        box_vol = self.getBoxSliceVol(0, lc)
        return box_vol - sphere_vol

[docs]    def getCylinderSliceVol(self, rmin, rmax, length):
        """
        Return the cylinder slice volume.

        :type rmin: float
        :param rmin: the lower bound on the layer in Angstrom

        :type rmax: float
        :param rmax: the upper bound on the layer in Angstrom

        :type length: float
        :param length: the length of the cylinder in Angstrom

        :rtype: float
        :return: the cylinder slice volume in Ang.^3
        """

        return length * numpy.pi * (rmax**2 - rmin**2)

[docs]    def getOutsideCylinderVol(self, radius, length):
        """
        Return the box volume less the cylinder volume.

        :type radius: float
        :param radius: the radius of the cylinder in Angstrom

        :type length: float
        :param length: the length of the cylinder in Angstrom

        :rtype: float
        :return: the box volume less the cylinder volume in Ang.^3
        """

        cylinder_vol = self.getCylinderSliceVol(0, radius, length)
        la, lb, lc = self.cell_lengths
        box_vol = self.getBoxSliceVol(0, lc)
        return box_vol - cylinder_vol

[docs]    def getEllipsoidSliceVol(self, rmin, rmax):
        """
        Return the ellipsoid slice volume.

        :type rmin: float
        :param rmin: the lower bound on the layer in Angstrom

        :type rmax: float
        :param rmax: the upper bound on the layer in Angstrom

        :rtype: float
        :return: the ellipsoid slice volume in Ang.^3
        """

        # ellipsoids are prolate spheroids

        rpmin = self.factor * rmin
        rpmax = self.factor * rmax
        return 4 * numpy.pi * (rmax * rmax * rpmax - rmin * rmin * rpmin) / 3

[docs]    def getOutsideEllipsoidVol(self, radius):
        """
        Return the box volume less the ellipsoid volume.

        :type radius: float
        :param radius: the radius of the ellipsoid in Angstrom, this is half
            the length of the minor axis

        :rtype: float
        :return: the box volume less the ellipsoid volume in Ang.^3
        """

        ellipsoid_vol = self.getEllipsoidSliceVol(0, radius)
        la, lb, lc = self.cell_lengths
        box_vol = self.getBoxSliceVol(0, lc)
        return box_vol - ellipsoid_vol

[docs]    def getNumber(self, info, amin, amax):
        """
        Return the number of surfactant or solvent molecules
        to add for the given info.

        :type info: SurfactantInfo or SolventInfo
        :param info: the info object for this layer

        :type amin: float
        :param amin: the lower bound on the layer in Angstrom

        :type amax: float or None
        :param amax: the upper bound on the layer in Angstrom or None
            if there isn't one in which case the remaining outermost space
            of the cell will be used

        :rtype: int
        :return: the number of molecules
        """

        if info.number:
            self._number_dict[info] += info.number
            return info.number

        # see MATSCI-12671 - prevent packmol from having to pack outside
        # the requested cell dimensions for CG structures, otherwise this
        # leads to overlapping images, do this by increasing the molecular
        # volume, the 1.56 = 2.35 Ang. / 1.5 Ang. where 2.35 Ang. is a
        # typical Martini particle radius and 1.5 Ang. is the radius for a
        # carbon atom
        vdw_scale = 1.56 if msutils.is_coarse_grain(info.st) else 1
        buffer_len = max(atom.radius for atom in info.st.atom)
        mol_vol = features.Complex(info.st).getVDWVolume(vdw_scale=vdw_scale,
                                                         buffer_len=buffer_len)
        vol = self.getLayerVolume(amin, amax)
        # does the number of solvent molecules need to be reduced by the volume
        # taken up by counter-ions?
        number = max(
            int(self.packing_f * (info.packing_pct / 100) * vol / mol_vol), 1)
        self._number_dict[info] += number
        return number

[docs]    def getLayerVolume(self, amin, amax, buffer_len=0):
        """
        Return the volume of this layer.

        :type amin: float
        :param amin: the lower bound on the layer in Angstrom

        :type amax: float or None
        :param amax: the upper bound on the layer in Angstrom or None
            if there isn't one in which case the remaining outermost space
            of the cell will be used

        :type buffer_len: float
        :param buffer_len: a buffer length in Angstrom

        :rtype: int
        :return: the volume in Ang.^3
        """

        if amax:
            vol = self.getVol(amin, amax + buffer_len)
        else:
            vol = self.getOutsideVol()
        return vol

[docs]    def addLayer(self,
                 info,
                 amin,
                 amax,
                 hydrophilic_at_max=False,
                 number=None,
                 add_cion=False):
        """
        Add a layer.

        :type info: SurfactantInfo or SolventInfo
        :param info: the info object for this layer

        :type amin: float
        :param amin: the lower bound on the layer in Angstrom

        :type amax: float or None
        :param amax: the upper bound on the layer in Angstrom or None
            if there isn't one in which case the remaining outermost space
            of the cell will be used

        :type hydrophilic_at_max: bool
        :param hydrophilic_at_max: if info is SurfactantInfo whether the
            hydrophilic atoms are to be located at the maximum

        :type number: int or None
        :param number: the number of molecules to add, if None it will be
            determined

        :type add_cion: bool
        :param add_cion: if info is SurfactantInfo whether the layer
            is for the counter-ion
        """

        if number is None:
            number = self.getNumber(info, amin, amax)
        body = f'  number {number}\n'

        st = info.st
        name = info.name

        # handle surfactant and solvent constraints
        hydrophilic_idxs = getattr(info, 'hydrophilic_idxs', None)
        hydrophobic_idxs = getattr(info, 'hydrophobic_idxs', None)
        if hydrophilic_idxs and hydrophobic_idxs:
            if add_cion:
                st = info.cion_st
                name = info.cion_name
                # counter-ions have the same constraints as solvent
                body += self.getSolventConstraints(amin, amax)
            else:
                hydrophilic_idxs = ' '.join(str(x) for x in hydrophilic_idxs)
                hydrophobic_idxs = ' '.join(str(x) for x in hydrophobic_idxs)
                max_surfactant_dist, max_hydrophilic_dist, max_hydrophobic_dist = \
                    self.max_surfactant_dists[info.layer]
                if hydrophilic_at_max:
                    hydrophilic_key = self.SURFACTANT_TOP_ATOM_CONSTRAINT
                    hydrophilic_value = self.getConstraintType(
                        amax - max_hydrophilic_dist)
                    hydrophobic_key = self.SURFACTANT_BOTTOM_ATOM_CONSTRAINT
                    hydrophobic_value = self.getConstraintType(
                        amin + max_hydrophobic_dist)
                else:
                    hydrophilic_key = self.SURFACTANT_BOTTOM_ATOM_CONSTRAINT
                    hydrophilic_value = self.getConstraintType(
                        amin + max_hydrophilic_dist)
                    hydrophobic_key = self.SURFACTANT_TOP_ATOM_CONSTRAINT
                    hydrophobic_value = self.getConstraintType(
                        amax - max_hydrophobic_dist)
                body += self.getSurfactantConstraints(amin, amax)
                body += f'  atoms {hydrophilic_idxs}\n'
                body += f'    {hydrophilic_key} {hydrophilic_value}\n'
                body += '  end atoms\n'
                body += f'  atoms {hydrophobic_idxs}\n'
                body += f'    {hydrophobic_key} {hydrophobic_value}\n'
                body += '  end atoms\n'
        else:
            body += self.getSolventConstraints(amin, amax)

        body += self.getCoarseGrainedRadiiBody(st)

        # see MATSCI-7720 - we need to ensure unique residue numbering
        # in order to properly post-process the bonding, etc.
        body += '  resnumbers 0\n'

        self.addStructureBody(name, body)

[docs]    def getCoarseGrainedRadiiBody(self, st):
        """
        Return the coarse-grained radii body.

        :type st: schrodinger.structure.Structure
        :param st: the structure

        :rtype: str
        :return: the coarse-grain radii body
        """

        if not msutils.is_coarse_grain(st):
            return ''

        # all particles of a given name will have the same radius
        names_to_idxs = {}
        names_to_radii = {}
        for atom in st.atom:
            names_to_idxs.setdefault(atom.name, []).append(atom.index)
            names_to_radii[atom.name] = atom.radius

        body = ''
        for name, idxs in names_to_idxs.items():
            idxs = ' '.join(str(x) for x in idxs)
            radius = names_to_radii[name]
            body += f'  atoms {idxs}\n'
            body += f'    radius {radius}\n'
            body += '  end atoms\n'

        return body

[docs]    def getTotalSurfactantThickness(self, layers_are_bilayers=True):
        """
        Return the total surfactant thickness in Angstrom.

        :type layers_are_bilayers: bool
        :param layers_are_bilayers: whether layers are bilayers

        :rtype: float
        :return: the total surfactant thickness in Angstrom
        """

        # include separations
        thicknesses = [dists[0] for dists in self.max_surfactant_dists.values()]
        n_layer_seps = len(self.max_surfactant_dists) - 1
        thickness = sum(thicknesses) + n_layer_seps * self.layer_sep
        if layers_are_bilayers:
            return 2 * thickness + self.layer_sep
        else:
            return thickness

[docs]    def getTotalCIonThickness(self):
        """
        Return the total counter-ion thickness in Angstrom.

        :rtype: float
        :return: the total counter-ion thickness in Angstrom
        """

        # do not include separations

        if self.max_cion_dists:
            return max(self.max_cion_dists.values())
        else:
            return FALLBACK_THICKNESS

[docs]    def getTotalNonSurfactantThickness(self):
        """
        Return the total non-surfactant thickness in Angstrom.

        :rtype: float
        :return: the total non-surfactant thickness in Angstrom
        """

        # do not include separations

        # counter-ions always go in the hydrophilic layer

        cion_t = self.getTotalCIonThickness()
        philic_t = self.max_solvent_dists.get(HYDROPHILIC, FALLBACK_THICKNESS)
        phobic_t = self.max_solvent_dists.get(HYDROPHOBIC, FALLBACK_THICKNESS)
        return max(cion_t, philic_t) + phobic_t

[docs]    def addSurfactantBodies(self,
                            start_dist,
                            start_hydrophilic_at_max=False,
                            layers=None,
                            layers_are_bilayers=True):
        """
        Add surfactant bodies to the input file.

        :type start_dist: float
        :param start_dist: start adding surfactants at this distance in Angstrom

        :type start_hydrophilic_at_max: bool
        :param start_hydrophilic_at_max: specifies whether the
            hydrophilic atoms for the starting layer are to be
            located at the maximum

        :type layers: list or None
        :param layers: the surfactant layers to add, if None then all
            will be added

        :type layers_are_bilayers: bool
        :param layers_are_bilayers: whether layers are bilayers

        :rtype: float, bool
        :return: surfactants stopped being added at this distance in Angstrom,
            whether the hydrophilic atoms for the final layer are located at
            the maximum
        """

        if layers is None:
            layers = self.getSurfactantLayers()

        amax = start_dist
        hydrophilic_at_max = not start_hydrophilic_at_max
        for layer in layers:
            surfactant_infos = self.surfactant_infos[layer]
            surfactant_t = self.max_surfactant_dists[layer][0]
            amin = amax + self.layer_sep
            amax = amin + surfactant_t
            hydrophilic_at_max = not hydrophilic_at_max
            for info in surfactant_infos:
                number = self.getNumber(info, amin, amax)
                self._volume_dict[info] += self.getLayerVolume(
                    amin, amax, buffer_len=self.layer_sep)
                self.addLayer(info,
                              amin,
                              amax,
                              hydrophilic_at_max=hydrophilic_at_max,
                              number=number)
            if not layers_are_bilayers:
                continue
            amin = amax + self.layer_sep
            amax = amin + surfactant_t
            hydrophilic_at_max = not hydrophilic_at_max
            for info in surfactant_infos:
                number = self.getNumber(info, amin, amax)
                self._volume_dict[info] += self.getLayerVolume(
                    amin, amax, buffer_len=self.layer_sep)
                self.addLayer(info,
                              amin,
                              amax,
                              hydrophilic_at_max=hydrophilic_at_max,
                              number=number)

        return amax, hydrophilic_at_max

[docs]    def addCIonBodies(self, start_dist, stop_dist, layers=None, factor=1):
        """
        Add counter-ion bodies to the input file.

        :type start_dist: float
        :param start_dist: start adding counter-ions at this distance in Angstrom

        :type stop_dist: float
        :param stop_dist: stop adding counter-ions at this distance in Angstrom

        :type layers: list or None
        :param layers: the counter-ion layers to add, if None then all
            will be added

        :type factor: float
        :param factor: multiplies the corresponding number of surfactant molecules
            to set the number of counter-ions in the given region
        """

        if layers is None:
            layers = self.getSurfactantLayers()

        for layer in layers:
            for info in self.surfactant_infos[layer]:
                if info.cion_st:
                    # does the rounding done here keep charges consistent?
                    number = int(factor * self._number_dict[info]) or 1
                    self.addLayer(info,
                                  start_dist,
                                  stop_dist,
                                  number=number,
                                  add_cion=True)

[docs]    def addSolventBodies(self,
                         start_dist,
                         stop_dist,
                         layers=None,
                         layer_sep=None):
        """
        Add solvent bodies to the input file.

        :type start_dist: float
        :param start_dist: start adding solvents at this distance in Angstrom

        :type stop_dist: float
        :param stop_dist: stop adding solvents at this distance in Angstrom

        :type layers: list or None
        :param layers: the solvent layers to add, if None then all
            will be added

        :type layer_sep: float
        :param layer_sep: the layer separation in Angstrom
        """

        if layer_sep is None:
            layer_sep = self.layer_sep

        if layers is None:
            layers = self.getSolventLayers()

        for layer in layers:
            for info in self.solvent_infos[layer]:
                self._volume_dict[info] += self.getLayerVolume(
                    start_dist, stop_dist, buffer_len=layer_sep)
                self.addLayer(info, start_dist, stop_dist)

[docs]    def getSurfactantLayers(self):
        """
        Return a list of surfactant layers.

        :rtype: list
        :return: the surfactant layers
        """

        return sorted(self.surfactant_infos.keys())

[docs]    def getSolventLayers(self):
        """
        Return a list of solvent layers.

        :rtype: list
        :return: the solvent layers
        """

        return sorted(self.solvent_infos.keys())

[docs]    def getConstraintType(self, parameter):
        """
        Return the constraint type.

        :type parameter: float
        :param parameter: the parameter in Angstrom

        :rtype: str
        :return: the constraint type
        """

        return self.CONSTRAINT_TYPE.format(parameter=parameter)


[docs]class MonolayerInputFile(StructuredLiquidInputFile):
    """
    Manage a monolayer input file.
    """

    SURFACTANT_TOP_ATOM_CONSTRAINT = 'over'
    SURFACTANT_BOTTOM_ATOM_CONSTRAINT = 'below'
    CONSTRAINT_TYPE = 'plane 0 0 1 {parameter}'

[docs]    def getMinCellLengths(self):
        """
        Return the minimum cell lengths.

        :rtype: tuple
        :return: a triple containing float and/or None, float is a minimum
            cell length in Angstrom, None indicates that there is no minimum
        """

        t_min = self.getTotalSurfactantThickness(layers_are_bilayers=False)
        t_min += self.getTotalNonSurfactantThickness()
        t_min += 2 * self.layer_sep
        return (None, None, t_min)

[docs]    def check(self):
        """
        Check cell lengths.

        :raise PackmolInputFileException: if there is an issue with the input
        """

        # monolayers are asymmetric and so both a hydrophilic and hydrophobic solvent
        # must be given
        if len(self.solvent_infos) != 2:
            raise PackmolInputFileException(
                'Two different solvent layers are required.')

        super().check()

[docs]    def getVol(self, amin, amax):
        """
        Return the volume.

        :type amin: float
        :param amin: the lower bound on the layer in Angstrom

        :type amax: float
        :param amax: the upper bound on the layer in Angstrom

        :rtype: float
        :return: the volume in Ang.^3
        """

        return self.getBoxSliceVol(amin, amax)

[docs]    def getSurfactantConstraints(self, amin, amax):
        """
        Return the surfactant constraints.

        :type amin: float
        :param amin: the lower bound on the layer in Angstrom

        :type amax: float
        :param amax: the upper bound on the layer in Angstrom

        :rtype: str
        :return: the surfactant constraints
        """

        la, lb, lc = self.cell_lengths
        return f'  inside box 0 0 {amin} {la} {lb} {amax}\n'

[docs]    def getSolventConstraints(self, amin, amax):
        """
        Return the solvent constraints.

        :type amin: float
        :param amin: the lower bound on the layer in Angstrom

        :type amax: float
        :param amax: the upper bound on the layer in Angstrom

        :rtype: str
        :return: the solvent constraints
        """

        la, lb, lc = self.cell_lengths
        body = f'  inside box 0 0 {amin} {la} {lb} {amax}\n'
        return body

[docs]    def addStructureBodies(self):
        """
        See parent class for documentation.
        """

        self.check()

        la, lb, lc = self.cell_lengths

        total_surfactant_t = self.getTotalSurfactantThickness(
            layers_are_bilayers=False)
        solvent_t = (lc - (total_surfactant_t + 2 * self.layer_sep)) / 2

        # bottom solvent
        c_min = 0
        c_max = c_min + solvent_t
        self.addSolventBodies(c_min, c_max, layers=[HYDROPHILIC])

        # surfactants
        c_max, final_hydrophilic_at_max = self.addSurfactantBodies(
            c_max, start_hydrophilic_at_max=False, layers_are_bilayers=False)

        # counter-ion, only add for hydrophilic layer
        self.addCIonBodies(c_min, c_min + solvent_t, factor=1)

        # top solvent
        c_min = c_max + self.layer_sep
        c_max = c_min + solvent_t
        self.addSolventBodies(c_min, c_max, layers=[HYDROPHOBIC], layer_sep=0)


[docs]class BilayerInputFile(MonolayerInputFile):
    """
    Manage a bilayer input file.
    """

[docs]    def getMinCellLengths(self):
        """
        Return the minimum cell lengths.

        :rtype: tuple
        :return: a triple containing float and/or None, float is a minimum
            cell length in Angstrom, None indicates that there is no minimum
        """

        t_min = self.getTotalSurfactantThickness()
        t_min += 2 * self.getTotalNonSurfactantThickness()
        t_min += 2 * self.layer_sep
        return (None, None, t_min)

[docs]    def check(self):
        """
        Check cell lengths.

        :raise PackmolInputFileException: if there is an issue with the input
        """

        if len(self.solvent_infos) != 1:
            raise PackmolInputFileException(
                'Only a single solvent layer is allowed.')

        StructuredLiquidInputFile.check(self)

[docs]    def addStructureBodies(self):
        """
        See parent class for documentation.
        """

        self.check()

        la, lb, lc = self.cell_lengths

        total_surfactant_t = self.getTotalSurfactantThickness()
        solvent_t = (lc - (total_surfactant_t + 2 * self.layer_sep)) / 2

        # bottom solvent
        c_min = 0
        c_max = c_min + solvent_t
        self.addSolventBodies(c_min, c_max)

        # surfactants
        c_max, final_hydrophilic_at_max = self.addSurfactantBodies(c_max)

        # bottom counter-ion
        self.addCIonBodies(c_min, c_min + solvent_t, factor=0.5)

        # top solvent
        c_min = c_max + self.layer_sep
        c_max = c_min + solvent_t
        self.addSolventBodies(c_min, c_max, layer_sep=0)

        # top counter-ion
        self.addCIonBodies(c_min, c_max, factor=0.5)


[docs]class MicelleInputFile(StructuredLiquidInputFile):
    """
    Manage a micelle input file.
    """

    SURFACTANT_TOP_ATOM_CONSTRAINT = 'outside'
    SURFACTANT_BOTTOM_ATOM_CONSTRAINT = 'inside'
    CONSTRAINT_TYPE = 'sphere 0 0 0 {parameter}'

[docs]    def getMinCellLengths(self):
        """
        Return the minimum cell lengths.

        :rtype: tuple
        :return: a triple containing float and/or None, float is a minimum
            cell length in Angstrom, None indicates that there is no minimum
        """

        t_min = 2 * self.getRadius()
        t_min += 2 * self.getTotalNonSurfactantThickness()
        t_min += 2 * self.layer_sep
        return (t_min, t_min, t_min)

[docs]    def check(self):
        """
        Check cell lengths.

        :raise PackmolInputFileException: if there is an issue with the input
        """

        if len(self.solvent_infos) != 1:
            raise PackmolInputFileException(
                'Only a single solvent layer is allowed.')

        super().check()

[docs]    def getRadius(self):
        """
        Return the radius.

        :rtype: float
        :return: the radius in Ang.
        """

        # in general a micelle is a stacked bilayer with a monolayer core
        total_surfactant_t = self.getTotalSurfactantThickness()
        first_surfactant_layer = self.getSurfactantLayers()[0]
        first_surfactant_t = self.max_surfactant_dists[first_surfactant_layer][
            0]
        return total_surfactant_t - first_surfactant_t - self.layer_sep

[docs]    def getVol(self, amin, amax):
        """
        Return the volume.

        :type amin: float
        :param amin: the lower bound on the layer in Angstrom

        :type amax: float
        :param amax: the upper bound on the layer in Angstrom

        :rtype: float
        :return: the volume in Ang.^3
        """

        return self.getSphereSliceVol(amin, amax)

[docs]    def getOutsideVol(self):
        """
        Return the outside volume.

        :rtype: float
        :return: the outside volume in Ang.^3
        """

        radius = self.getRadius() + self.layer_sep
        return self.getOutsideSphereVol(radius)

[docs]    def getSurfactantConstraints(self, amin, amax):
        """
        Return the surfactant constraints.

        :type amin: float
        :param amin: the lower bound on the layer in Angstrom

        :type amax: float
        :param amax: the upper bound on the layer in Angstrom

        :rtype: str
        :return: the surfactant constraints
        """

        la, lb, lc = self.cell_lengths
        return f'  inside box {-la/2} {-lb/2} {-lc/2} {la/2} {lb/2} {lc/2}\n'

[docs]    def getSolventConstraints(self, amin, amax):
        """
        Return the solvent constraints.

        :type amin: float
        :param amin: the lower bound on the layer in Angstrom

        :type amax: float
        :param amax: the upper bound on the layer in Angstrom

        :rtype: str
        :return: the solvent constraints
        """

        body = self.getSurfactantConstraints(amin, amax)
        radius = self.getRadius() + self.layer_sep
        value = self.getConstraintType(radius)
        body += f'  outside {value}\n'
        return body

[docs]    def addStructureBodies(self):
        """
        See parent class for documentation.
        """

        self.check()

        layers = self.getSurfactantLayers()

        # first surfactant, monolayer
        layer = layers[0]
        r_min = 0
        r_max = r_min + self.max_surfactant_dists[layer][0]
        hydrophilic_at_max = True
        for info in self.surfactant_infos[layer]:
            number = self.getNumber(info, r_min, r_max)
            self._volume_dict[info] += self.getLayerVolume(
                r_min, r_max, buffer_len=self.layer_sep)
            self.addLayer(info,
                          r_min,
                          r_max,
                          hydrophilic_at_max=hydrophilic_at_max,
                          number=number)

        # remaining surfactants, bilayers
        hydrophilic_at_max = not hydrophilic_at_max
        r_max, final_hydrophilic_at_max = self.addSurfactantBodies(
            r_max,
            start_hydrophilic_at_max=hydrophilic_at_max,
            layers=layers[1:])

        # solvent
        r_min = r_max + self.layer_sep
        r_max = None
        self.addSolventBodies(r_min, r_max, layer_sep=0)

        # first counter-ion
        for info in self.surfactant_infos[layer]:
            if info.cion_st:
                number = self._number_dict[info]
                self.addLayer(info, r_min, r_max, number=number, add_cion=True)

        # remaining counter-ions
        self.addCIonBodies(r_min, r_max, layers=layers[1:], factor=1)


[docs]class LiposomeInputFile(MicelleInputFile):
    """
    Manage a liposome input file.
    """

[docs]    def prepare(self, *args, **kwargs):
        """
        See parent class for documentation.

        :type radius: float
        :param radius: the radius of the liposome in Ang.
        """

        self.radius = kwargs.pop('radius')
        super().prepare(*args, **kwargs)

[docs]    def getMinRadius(self):
        """
        Return the minimum radius.

        :rtype: float
        :return: the minimum radius in Angstrom
        """

        r_min = self.getTotalSurfactantThickness()
        r_min += self.getTotalNonSurfactantThickness()
        r_min += self.layer_sep
        return r_min

[docs]    def check(self):
        """
        Check cell lengths.

        :raise PackmolInputFileException: if there is an issue with the input
        """

        super().check()

        r_min = self.getMinRadius()
        radius = self.getRadius()
        if r_min > radius:
            raise PackmolInputFileException(
                'The outer radius is '
                'not large enough to accomodate surfactant and solvent layers.')

[docs]    def getRadius(self):
        """
        Return the radius.

        :rtype: float
        :return: the radius in Ang.
        """

        return self.radius

[docs]    def getInnerRadius(self):
        """
        Return the inner radius.

        :rtype: float
        :return: the inner radius in Ang.
        """

        total_surfactant_t = self.getTotalSurfactantThickness()
        return self.getRadius() - total_surfactant_t - self.layer_sep

[docs]    def getSolventConstraints(self, amin, amax):
        """
        Return the solvent constraints.

        :type amin: float
        :param amin: the lower bound on the layer in Angstrom

        :type amax: float or None
        :param amax: the upper bound on the layer in Angstrom or None
            if there isn't one in which case the remaining outermost space
            of the cell will be used

        :rtype: str
        :return: the constraints
        """

        if amax:
            body = self.getSurfactantConstraints(amin, amax)
            value = self.getConstraintType(amax)
            body += f'  inside {value}\n'
        else:
            body = super().getSolventConstraints(amin, amax)
        return body

[docs]    def addStructureBodies(self):
        """
        See parent class for documentation.
        """

        self.check()

        # inner solvent
        r_min = 0
        r_max = self.getInnerRadius()
        self.addSolventBodies(r_min, r_max)

        # surfactants
        r_max, final_hydrophilic_at_max = self.addSurfactantBodies(r_max)

        inside_vol = self.getVol(0, self.getInnerRadius())
        outside_vol = self.getOutsideVol()
        factor = inside_vol / (inside_vol + outside_vol)

        # inner counter-ions
        self.addCIonBodies(r_min, self.getInnerRadius(), factor=factor)

        # outer solvent
        r_min = r_max + self.layer_sep
        r_max = None
        self.addSolventBodies(r_min, r_max, layer_sep=0)

        # outer counter-ions
        factor = 1 - factor
        self.addCIonBodies(r_min, r_max, factor=factor)


[docs]class WormlikeMicelleInputFile(MicelleInputFile):
    """
    Manage a wormlike micelle input file.
    """

    CONSTRAINT_TYPE = 'cylinder 0 0 {bottom} 0 0 1 {parameter} {length}'

[docs]    def getMinCellLengths(self):
        """
        Return the minimum cell lengths.

        :rtype: tuple
        :return: a triple containing float and/or None, float is a minimum
            cell length in Angstrom, None indicates that there is no minimum
        """

        t_min = 2 * self.getRadius()
        t_min += 2 * self.getTotalNonSurfactantThickness()
        t_min += 2 * self.layer_sep
        return (t_min, t_min, None)

[docs]    def getVol(self, amin, amax):
        """
        Return the volume.

        :type amin: float
        :param amin: the lower bound on the layer in Angstrom

        :type amax: float
        :param amax: the upper bound on the layer in Angstrom

        :rtype: float
        :return: the volume in Ang.^3
        """

        la, lb, lc = self.cell_lengths
        return self.getCylinderSliceVol(amin, amax, lc)

[docs]    def getOutsideVol(self):
        """
        Return the outside volume.

        :rtype: float
        :return: the outside volume in Ang.^3
        """

        radius = self.getRadius() + self.layer_sep
        la, lb, lc = self.cell_lengths
        return self.getOutsideCylinderVol(radius, lc)

[docs]    def getConstraintType(self, parameter):
        """
        Return the constraint type.

        :type parameter: float
        :param parameter: the parameter in Angstrom

        :rtype: str
        :return: the constraint type
        """

        la, lb, lc = self.cell_lengths
        return self.CONSTRAINT_TYPE.format(bottom=-lc / 2,
                                           parameter=parameter,
                                           length=lc)


[docs]class ElongatedMicelleInputFile(MicelleInputFile):
    """
    Manage an elongated micelle input file.
    """

    CONSTRAINT_TYPE = 'ellipsoid 0 0 0 {parameter} {parameter} {parameter_p} 1'

[docs]    def prepare(self, *args, **kwargs):
        """
        See parent class for documentation.

        :type factor: float
        :param factor: the scale factor for the principal axis
        """

        self.factor = kwargs.pop('factor')
        super().prepare(*args, **kwargs)

[docs]    def getMinCellLengths(self):
        """
        Return the minimum cell lengths.

        :rtype: tuple
        :return: a triple containing float and/or None, float is a minimum
            cell length in Angstrom, None indicates that there is no minimum
        """

        t_min = 2 * self.getRadius()
        t_min += 2 * self.getTotalNonSurfactantThickness()
        t_min += 2 * self.layer_sep
        tp_min = 2 * self.getMajorRadius()
        tp_min += 2 * self.getTotalNonSurfactantThickness()
        tp_min += 2 * self.layer_sep
        return (t_min, t_min, tp_min)

[docs]    def getMajorRadius(self):
        """
        Return the major radius.

        :rtype: float
        :return: the major radius in Ang.
        """

        # this is the radius of the elongated sphere

        return self.factor * self.getRadius()

[docs]    def getVol(self, amin, amax):
        """
        Return the volume.

        :type amin: float
        :param amin: the lower bound on the layer in Angstrom

        :type amax: float
        :param amax: the upper bound on the layer in Angstrom

        :rtype: float
        :return: the volume in Ang.^3
        """

        return self.getEllipsoidSliceVol(amin, amax)

[docs]    def getOutsideVol(self):
        """
        Return the outside volume.

        :rtype: float
        :return: the outside volume in Ang.^3
        """

        radius = self.getRadius() + self.layer_sep
        return self.getOutsideEllipsoidVol(radius)

[docs]    def getConstraintType(self, parameter):
        """
        Return the constraint type.

        :type parameter: float
        :param parameter: the parameter in Angstrom

        :rtype: str
        :return: the constraint type
        """

        parameter_p = self.factor * parameter
        return self.CONSTRAINT_TYPE.format(parameter=parameter,
                                           parameter_p=parameter_p)


[docs]def get_writer(surfactant_infos, solvent_infos, base_name, seed, n_loop,
               cell_lengths, packing_f, model_type, **kwargs):
    """
    Get the writer.

    :type surfactant_infos: list[`SurfactantInfo`]
    :param surfactant_infos: contains surfactant infos

    :type solvent_infos: list[`SolventInfo`]
    :param solvent_infos: contains solvent infos

    :type base_name: str
    :param base_name: base name used for output file naming

    :type seed: int
    :param seed: seed for random

    :type n_loop: int
    :param n_loop: the number of packmol loops

    :type cell_lengths: tuple[float]
    :param cell_lengths: the cell lengths in Angstrom

    :type packing_f: float
    :param packing_f: the packing factor

    :type model_type: str
    :param model_type: the model type, a key in the class map

    :rtype: `StructuredLiquidInputFile`
    :return: the writer
    """

    # set up packmol input file, for now using fixed values of some parameters,
    # the given tolerance is used for both atomistic and coarse-grained, for
    # coarse-grained increasing it for a test case reproduced the same results

    tolerance = 2  # Ang.
    layer_sep = 1  # Ang.
    min_constraint_window_surfactant_idxs = 25  # percent

    output_base_name = f'{base_name}_out'
    general_body = f'seed {seed}\n'
    general_body += f'nloop {n_loop}\n'
    # MATSCI-7885 - it was decided that 'sidemax' should be increased
    # from the default of 1000. to avoid Packmol logging an error for
    # large input cells
    general_body += f'sidemax {SIDEMAX}\n'
    general_body += '\n'

    class_map = get_class_map()
    aclass = class_map[model_type]

    writer = aclass(tolerance=tolerance,
                    output_base_name=output_base_name,
                    general_body=general_body)

    writer.prepare(cell_lengths,
                   surfactant_infos,
                   solvent_infos,
                   layer_sep=layer_sep,
                   packing_f=packing_f,
                   min_constraint_window_surfactant_idxs=
                   min_constraint_window_surfactant_idxs,
                   **kwargs)

    return writer


[docs]def get_parser(description, packmol_input=True):
    """
    Get the command line argument parser.

    :type description: str
    :param description: the description

    :type packmol_input: bool
    :param packmol_input: whether the input is a packmol
        input file

    :rtype: parserutils.DriverParser
    :return: command line argument parser
    """

    parser = parserutils.DriverParser(
        description=description,
        add_help=False,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    if packmol_input:
        ahelp = ('Specify packmol input *inp files.')
        parser.add_argument(FLAG_PACKMOL_INPUT_FILES,
                            nargs='+',
                            type=parserutils.type_file,
                            help=ahelp)
    else:
        ahelp = ('Specify a structrued liquid builder input *json file.')
        parser.add_argument(FLAG_SLB_INPUT_FILE,
                            type=parserutils.type_json_file,
                            help=ahelp)

    ahelp = ('Specify a structure file for any *pdb files referenced in other '
             'input files.  This flag can be used multiple times.  Values '
             'are two strings, the first is the *mae file containing the '
             'structure while the second is the *pdb file name referenced '
             'in the other input files.  Structures can be atomistic '
             'or coarse-grained but all must be one or the other.')
    parser.add_argument(FLAG_STRUCTURE_FILE,
                        nargs=2,
                        action='append',
                        required=True,
                        help=ahelp)

    ahelp = (
        'Specify in Angstrom the lengths of the a-, b-, and c- lattice '
        'vectors that define the periodic boundary condition for the output '
        'cells.  This flag should be specified for each of the specified '
        'input files.')
    action = 'append' if packmol_input else 'store'
    parser.add_argument(FLAG_BOX_LENGTHS,
                        nargs=3,
                        action=action,
                        required=True,
                        type=parserutils.type_positive_float,
                        help=ahelp)

    ahelp = ('If specified then the job fails if any subjob fails '
             'otherwise the job fails only if all subjobs fail.')
    parser.add_argument(FLAG_NO_FAILURES, action='store_true', help=ahelp)

    ahelp = ('By default an input Desmond *cms file is prepared for '
             'each packmol output file.  Use this option to instead only write '
             'the outputs to *mae files.')
    parser.add_argument(FLAG_NO_PREP_MD, action='store_true', help=ahelp)

    atomistic_fields = parserutils.valid_forcefield_info().replace('Val', 'val')
    ahelp = (
        'Specify the force field to use if preparing Desmond *cms '
        'files.  For atomistic systems {atff} For coarse-grained '
        'systems specify the name of a coarse-grained force field (see related '
        'location type flag {flag}.)').format(atff=atomistic_fields,
                                              flag=FLAG_CGFFLD_LOCATION_TYPE)
    parser.add_argument(FLAG_FORCE_FIELD,
                        default=msutils.get_default_forcefield().name,
                        help=ahelp)

    ahelp = ('Location type for a specified coarse-grained force field.  '
             'Option \"{install}\" means from a standard '
             'location in the Schrodinger installation while option '
             '\"{local}\" means either from {local_path} or the job '
             'launch directory, i.e. the CWD.').format(
                 install=parserutils.INSTALLED_CG_FF_LOCATION_TYPE,
                 local=parserutils.LOCAL_CG_FF_LOCATION_TYPE,
                 local_path=cgff.FF_PARAMETERS_LOCAL_PATH)
    parser.add_argument(FLAG_CGFFLD_LOCATION_TYPE,
                        choices=parserutils.CG_FF_LOCATION_TYPES,
                        default=parserutils.LOCAL_CG_FF_LOCATION_TYPE,
                        help=ahelp)

    #Specifiy the forcefield for water to use if preparing *cms' files
    parserutils.add_desmond_parser_arguments(parser, [WATER_FF_TYPE_FLAG])

    options = [cmdline.HOST, cmdline.JOBNAME]
    cmdline.add_jobcontrol_options(parser, options=options, group_options=True)

    return parser


[docs]def get_job_spec_from_args(argv,
                           description,
                           program_name=PROGRAM_NAME,
                           default_job_name=DEFAULT_JOB_NAME,
                           packmol_input=True):
    """
    Return a JobSpecification.

    :type argv: list
    :param argv: command line arguments including the script name at [0]

    :type description: str
    :param description: the description

    :type program_name: str
    :param program_name: the program name

    :type default_job_name: str
    :param default_job_name: the default job name

    :type packmol_input: bool
    :param packmol_input: whether the input is a packmol
        input file

    :rtype: schrodinger.job.launchapi.JobSpecification
    :return: the JobSpecification
    """

    parser = get_parser(description, packmol_input=packmol_input)
    options = parser.parse_args(argv[1:])

    prep_md = not jobutils.get_option(options, FLAG_NO_PREP_MD)

    if prep_md:
        input_sts = []
        for mae_file, pdb_file in jobutils.get_option(options,
                                                      FLAG_STRUCTURE_FILE):
            if os.path.exists(mae_file):
                input_sts.append(structure.Structure.read(mae_file))
            else:
                parser.error(f'The {mae_file} can not be found.')
                return

        options = cgff.validate_options(parser,
                                        argv[1:],
                                        FLAG_FORCE_FIELD,
                                        FLAG_CGFFLD_LOCATION_TYPE,
                                        input_sts=input_sts)

    job_builder = jobutils.prepare_job_spec_builder(argv, program_name,
                                                    default_job_name)

    if packmol_input:
        in_files = jobutils.get_option(options, FLAG_PACKMOL_INPUT_FILES)
    else:
        in_files = [jobutils.get_option(options, FLAG_SLB_INPUT_FILE)]

    for in_file in in_files:
        job_builder.setInputFile(in_file)

    for mae_file, pdb_file in jobutils.get_option(options, FLAG_STRUCTURE_FILE):
        job_builder.setInputFile(mae_file)

    if prep_md:
        cgff.add_local_type_force_field_to_job_builder_w_check(
            job_builder,
            options,
            FLAG_FORCE_FIELD,
            FLAG_CGFFLD_LOCATION_TYPE,
            input_sts=input_sts)

    return job_builder.getJobSpec()


[docs]def main(description, *args, default_job_name=DEFAULT_JOB_NAME):
    """
    Main function used by drivers to run packmol.

    :type description: str
    :param description: the parser description

    :type default_job_name: str
    :param default_job_name: the default job name
    """

    parser = get_parser(description)
    options = parser.parse_args(args)

    st_files = jobutils.get_option(options, FLAG_STRUCTURE_FILE)

    input_sts = []
    for mae_file, pdb_file in st_files:
        if not os.path.exists(mae_file):
            parser.error(f'The {mae_file} file can not be found.')
            sys.exit(1)
        elif os.path.exists(pdb_file):
            parser.error(f'The {pdb_file} file already exists and '
                         'would automatically be overwritten.')
            sys.exit(1)
        input_sts.append(structure.Structure.read(mae_file))

    prep_md = not jobutils.get_option(options, FLAG_NO_PREP_MD)

    if prep_md:
        options = cgff.validate_options(parser,
                                        args,
                                        FLAG_FORCE_FIELD,
                                        FLAG_CGFFLD_LOCATION_TYPE,
                                        input_sts=input_sts)

    job_name = jobutils.get_jobname(default_job_name)
    logger, log_file_name = textlogger.create_logger(related_filename=job_name)
    textlogger.log_initial_data(logger, options)

    packmol_files = jobutils.get_option(options, FLAG_PACKMOL_INPUT_FILES)
    box_lengths = jobutils.get_option(options, FLAG_BOX_LENGTHS)

    # check PBC
    if len(packmol_files) != len(box_lengths):
        msg = ('Box lengths must be specified for each packmol input file.')
        parser.error(msg)
        sys.exit(1)

    input_files = dict(zip(packmol_files, box_lengths))

    sts = {
        pdb_file: structure.Structure.read(mae_file)
        for mae_file, pdb_file in st_files
    }

    allow_failures = not jobutils.get_option(options, FLAG_NO_FAILURES)

    msg = None
    output_files = []
    if not prep_md:
        try:
            cells = get_cells(input_files, sts, allow_failures=allow_failures)
        except RuntimeError as err:
            msg = str(err)
        else:
            for input_file, st in cells.items():
                base_name = fileutils.get_basename(input_file)
                output_file = base_name + '_out.mae'
                st.write(output_file)
                output_files.append(output_file)
    else:
        force_field = jobutils.get_option(options, FLAG_FORCE_FIELD)
        water_force_field = jobutils.get_option(options, WATER_FF_TYPE_FLAG)

        cg_ff_loc_type = jobutils.get_option(options, FLAG_CGFFLD_LOCATION_TYPE)
        try:
            cell_files = write_desmond_cells(
                input_files,
                sts,
                allow_failures=allow_failures,
                force_field=force_field,
                water_force_field=water_force_field,
                cg_ff_loc_type=cg_ff_loc_type)
        except RuntimeError as err:
            msg = str(err)
        else:
            output_files.extend(cell_files.values())

    if msg:
        logger.error(msg)
        sys.exit(msg)

    for output_file in output_files[:-1]:
        jobutils.add_outfile_to_backend(output_file)
    jobutils.add_outfile_to_backend(output_files[-1], set_structure_output=True)


[docs]def get_surfactant_infos(slb_dict):
    """
    Return surfactant information from the given dictionary of structured
    liquid builder options. Maestro files referenced within must exist in
    the current working directory.

    :type slb_dict: dict
    :param slb_dict: contains structured liquid builder options

    :raise RuntimeError: if there is an issue

    :rtype: list[SurfactantInfo], list[str]
    :return: the surfactant information objects and any extra structure
        file flags
    """

    try:
        surfactants_dict = slb_dict[SURFACTANTS]
    except KeyError:
        raise RuntimeError('The structured liquid builder input must contain '
                           f'a "{SURFACTANTS}" section.')

    cleaner = jobutils.StringCleaner()
    lipid_st_dict = desmondutils._get_lipid_ff_st_dict()

    surfactant_infos = []
    extra_structure_file_flags = []
    for mae_file, surfactant_kwargs in surfactants_dict.items():
        if os.path.exists(mae_file):
            st = structure.Structure.read(mae_file)
        else:
            lipid_st_title = mae_file
            st = lipid_st_dict.get(lipid_st_title)
            if not st:
                raise RuntimeError(f'The {mae_file} can not be found.')
            else:
                st.write(f'{lipid_st_title}.mae')
                jobutils.add_outfile_to_backend(f'{lipid_st_title}.mae')
                flags = [
                    FLAG_STRUCTURE_FILE, f'{lipid_st_title}.mae',
                    f'{lipid_st_title}.pdb'
                ]
                extra_structure_file_flags.extend(flags)
        name = cleaner.cleanAndUniquify(st.title)
        packing_pct = surfactant_kwargs.get(PACKING_PCT)
        number = surfactant_kwargs.get(NUMBER)
        if not (bool(packing_pct) ^ bool(number)):
            raise RuntimeError(
                f'Only one of either "{PACKING_PCT}" or "{NUMBER}" '
                'can be specified.')
        elif packing_pct:
            packing_pct = float(packing_pct)
        elif number:
            number = int(number)
        try:
            layer = surfactant_kwargs[LAYER]
            hydrophilic_idxs = get_idxs(surfactant_kwargs[HYDROPHILIC_IDXS])
            hydrophobic_idxs = get_idxs(surfactant_kwargs[HYDROPHOBIC_IDXS])
        except KeyError:
            raise RuntimeError(
                f'For each surfactant all of {LAYER}, '
                f'{HYDROPHILIC_IDXS}, and {HYDROPHOBIC_IDXS} must be specified.'
            )
        cion_file_name = surfactant_kwargs.get(CION_FILE_NAME)
        if cion_file_name:
            cion_st = structure.Structure.read(cion_file_name)
            cion_name = cleaner.cleanAndUniquify(cion_st.title)
        else:
            cion_st = None
            cion_name = None
        surfactant_info = SurfactantInfo(st=st,
                                         name=name,
                                         packing_pct=packing_pct,
                                         number=number,
                                         layer=layer,
                                         hydrophilic_idxs=hydrophilic_idxs,
                                         hydrophobic_idxs=hydrophobic_idxs,
                                         cion_st=cion_st,
                                         cion_name=cion_name)
        surfactant_infos.append(surfactant_info)

    return surfactant_infos, extra_structure_file_flags


[docs]def get_solvent_infos(slb_dict):
    """
    Return solvent information from the given dictionary of structured
    liquid builder options. Maestro files referenced within must exist in
    the current working directory.

    :type slb_dict: dict
    :param slb_dict: contains structured liquid builder options

    :raise RuntimeError: if there is an issue

    :rtype: list[SolventInfo]
    :return: the solvent information objects
    """

    try:
        solvents_dict = slb_dict[SOLVENTS]
    except KeyError:
        raise RuntimeError('The structured liquid builder input must contain '
                           f'a "{SOLVENTS}" section.')

    cleaner = jobutils.StringCleaner()

    solvent_infos = []
    for mae_file, solvent_kwargs in solvents_dict.items():
        if not os.path.exists(mae_file):
            raise RuntimeError(f'The {mae_file} can not be found.')
        st = structure.Structure.read(mae_file)
        name = cleaner.cleanAndUniquify(st.title)
        packing_pct = solvent_kwargs.get(PACKING_PCT)
        number = solvent_kwargs.get(NUMBER)
        if not (bool(packing_pct) ^ bool(number)):
            raise RuntimeError(
                f'Only one of either "{PACKING_PCT}" or "{NUMBER}" '
                'can be specified.')
        elif packing_pct:
            packing_pct = float(packing_pct)
        elif number:
            number = int(number)
        layer = solvent_kwargs.get(LAYER, HYDROPHILIC)
        if layer not in [HYDROPHILIC, HYDROPHOBIC]:
            raise RuntimeError(f'A solvent {LAYER} must be either '
                               f'{HYDROPHILIC} or {HYDROPHOBIC}.')
        solvent_info = SolventInfo(st=st,
                                   name=name,
                                   packing_pct=packing_pct,
                                   number=number,
                                   layer=layer)
        solvent_infos.append(solvent_info)

    return solvent_infos


[docs]def write_packmol_input_file(slb_dict, box_lengths):
    """
    Write the packmol input file from the given dictionary of structured
    liquid builder options.

    :type slb_dict: dict
    :param slb_dict: contains structured liquid builder options

    :type box_lengths: tuple[float]
    :param box_lengths: the cell lengths in Angstrom

    :raise RuntimeError: if there is an issue

    :rtype: str, list[str]
    :return: the packmol input file name and any extra structure
        file flags
    """

    surfactant_infos, extra_structure_file_flags = get_surfactant_infos(
        slb_dict)
    solvent_infos = get_solvent_infos(slb_dict)

    seed = slb_dict.get(SEED, DEFAULT_SEED)
    n_loop = slb_dict.get(N_LOOP, DEFAULT_N_LOOP)
    packing_f = slb_dict.get(PACKING_F, DEFAULT_PACKING_F)
    model_type = slb_dict.get(MODEL_TYPE, BILAYER)
    model_kwargs = slb_dict.get(MODEL_KWARGS, {})
    job_name = jobutils.get_jobname(DEFAULT_JOB_NAME)

    # create packmol input file
    writer = get_writer(surfactant_infos, solvent_infos, job_name, seed, n_loop,
                        box_lengths, packing_f, model_type, **model_kwargs)
    try:
        writer.addStructureBodies()
    except PackmolInputFileException as err:
        raise RuntimeError(str(err))
    packmol_file = writer.write(input_base_name=job_name)
    jobutils.add_outfile_to_backend(packmol_file)

    return packmol_file, extra_structure_file_flags