Source code for schrodinger.application.matsci.packmol

"""
Utilities for working with packmol.

Copyright Schrodinger, LLC. All rights reserved.
"""

import os
import string
import sys
import time
import numpy
import argparse
from collections import namedtuple

from schrodinger import structure
from schrodinger.application.desmond import cms
from schrodinger.application.matsci import cgforcefield as cgff
from schrodinger.application.matsci import coarsegrain
from schrodinger.application.matsci import desmondutils
from schrodinger.application.matsci import jobutils
from schrodinger.application.matsci import msconst
from schrodinger.application.matsci import parserutils
from schrodinger.application.matsci import msutils
from schrodinger.application.matsci import textlogger
from schrodinger.application.matsci.mlearn import features
from schrodinger.application.matsci.nano import xtal
from schrodinger.infra import mm
from schrodinger.job import queue
from schrodinger.structutils import color
from schrodinger.test import ioredirect
from schrodinger.utils import fileutils
from schrodinger.utils import subprocess
from schrodinger.utils import cmdline

PDB_EXT = '.pdb'

PDBdata = namedtuple('PDBdata',
                     ['pdbname', 'pdbres', 'resnum', 'chain_name', 'order'])

# since pdbres names are taken to be in a field of 4 chars use the following
# short and rare character to uniqueify them
UNIQUE_PDB_RES_TAG = '#'

DEFAULT_PACKING_F = 0.8
DEFAULT_SEED = 123
DEFAULT_N_LOOP = 100

FALLBACK_THICKNESS = 0

SIDEMAX = 10000.

SURFACTANTS = 'surfactants'
SOLVENTS = 'solvents'
PACKING_PCT = 'packing_pct'
NUMBER = 'number'
LAYER = 'layer'
HYDROPHILIC_IDXS = 'hydrophilic_idxs'
HYDROPHOBIC_IDXS = 'hydrophobic_idxs'
CION_FILE_NAME = 'cion_file_name'

SurfactantInfo = namedtuple('SurfactantInfo', [
    'st', 'name', 'packing_pct', 'number', 'layer', 'hydrophilic_idxs',
    'hydrophobic_idxs', 'cion_st', 'cion_name'
])

SolventInfo = namedtuple('SolventInfo',
                         ['st', 'name', 'packing_pct', 'number', 'layer'])

HYDROPHILIC = 'Hydrophilic'
HYDROPHOBIC = 'Hydrophobic'

MONOLAYER = 'Monolayer'
BILAYER = 'Bilayer'
MICELLE = 'Micelle'
LIPOSOME = 'Liposome'
WORMLIKE_MICELLE = 'Wormlike Micelle'
ELONGATED_MICELLE = 'Elongated Micelle'

FLAG_PACKMOL_INPUT_FILES = '-packmol_input_files'
FLAG_SLB_INPUT_FILE = '-slb_input_file'
FLAG_STRUCTURE_FILE = '-structure_file'
FLAG_BOX_LENGTHS = '-box_lengths'
FLAG_NO_FAILURES = '-no_failures'
FLAG_NO_PREP_MD = '-no_prep_md'
FLAG_FORCE_FIELD = '-force_field'
WATER_FF_TYPE_FLAG = parserutils.WATER_FF_TYPE_FLAG
FLAG_CGFFLD_LOCATION_TYPE = '-cgffld_loc_type'

PROGRAM_NAME = 'Structured Liquid'
DEFAULT_JOB_NAME = 'structured_liquid'

SEPARATOR = ','

SEED = 'seed'
N_LOOP = 'n_loop'
PACKING_F = 'packing_f'
MODEL_TYPE = 'model_type'
MODEL_KWARGS = 'model_kwargs'


[docs]def get_idxs(idxs_str): """ Return integer indices from the given string of indices. :type idxs_str: str :param idxs_str: the string of indices :rtype: tuple :return: the integer indices """ return tuple( sorted(set([int(idx.strip()) for idx in idxs_str.split(SEPARATOR)])))
[docs]def get_class_map(): """ Return the class map. :rtype: dict :return: the class map """ return { MONOLAYER: MonolayerInputFile, BILAYER: BilayerInputFile, MICELLE: MicelleInputFile, LIPOSOME: LiposomeInputFile, WORMLIKE_MICELLE: WormlikeMicelleInputFile, ELONGATED_MICELLE: ElongatedMicelleInputFile }
[docs]def get_pdb_data(pdbname, pdbres, resnum=None, chain_name=None, order=None): """ Return a PDBdata. :type pdbname: str :param pdbname: the PDB atom name :type pdbres: str :param pdbres: the PDB residue name :type resnum: int or None :param resnum: the PDB residue number if needed :type chain_name: str or None :param chain_name: the PDB chain name if needed :type order: int or None :param order: a bond order to be used when bonding to the atom corresponding to this object :rtype: PDBdata :return: the PDB data """ if chain_name: chain_name = chain_name.strip() return PDBdata(pdbname=pdbname.strip(), pdbres=pdbres.strip(), resnum=resnum, chain_name=chain_name, order=order)
[docs]class Job(queue.SubprocessJob):
[docs] def __init__(self, *args, **kwargs): """ See parent class for documentation. """ self.output_file = kwargs.pop('output_file', None) super().__init__(*args, **kwargs) self.name = fileutils.get_basename(self.output_file) + '_packmol'
[docs] def doCommand(self, *args, **kwargs): """ See parent class for documentation. """ # typically ran as 'packmol < input.inp', # to avoid shell command pass script and # input separately self._start_time = time.time() script, input_file = self._command self._stdin = open(input_file, 'r') self._subprocess = subprocess.Popen([script], stdin=self._stdin, stdout=self._stdout, stderr=self._stderr, universal_newlines=True)
[docs] def postCommand(self): """ See parent class for documentation. """ super().postCommand() self._stdin.close()
[docs] def update(self): """ See parent class for documentation. """ # packmol failures do not return # an error code so manually handle it here super().update() if self.state == queue.JobState.DONE and self.output_file and \ not os.path.exists(self.output_file): self.state = queue.JobState.FAILED
[docs]def run(input_files, allow_failures=True): """ Run. :type input_files: list :param input_files: packmol input files :type allow_failures: bool :param allow_failures: if True then all but a single subjob are allowed to fail without causing the main job to fail :raise RuntimeError: if all subjobs fail, if allow_failures is False then if a single subjob fails then the main job fails :rtype: dict :return: keys are input files, values are structure output files """ # structure files referenced in the given packmol input files # are assumed to exist in the directory from which this function # is called if allow_failures: max_failures = len(input_files) - 1 else: max_failures = 0 job_dj = queue.JobDJ(verbosity='normal', max_failures=max_failures, max_retries=0) # get binary name if sys.platform == 'win32': packmol_bin = 'packmol.exe' else: packmol_bin = 'packmol' log_fhs = [] output_files = {} for input_file in input_files: with open(input_file, 'r') as afile: for line in afile: tokens = line.strip().split() if tokens and tokens[0].lower() == 'output': output_file = tokens[1] break else: # an input file that does not specify an # output file will fail anyway and is handled # using max_failures output_file = fileutils.get_basename(input_file) + '.out' base_name = fileutils.get_basename(output_file) log_file = base_name + '.log' jobutils.add_outfile_to_backend(log_file, stream=True) log_fh = open(log_file, 'w') log_fhs.append(log_fh) output_files[input_file] = output_file cmd = [packmol_bin, input_file] job = Job(cmd, stdout=log_fh, stderr=log_fh, output_file=output_file) job_dj.addJob(job) try: job_dj.run() finally: for log_fh in log_fhs: log_fh.close() for input_file, output_file in output_files.items(): if not os.path.exists(output_file): output_files[input_file] = None jobutils.add_outfile_to_backend(output_file) return output_files
[docs]class PDBWriter(structure.PDBWriter):
[docs] def write(self, ct): """ See parent class for documentation. """ # First check CT is able to be written. TexualStructure objects # are not: if isinstance(ct, structure.TextualStructure): raise Exception("TextualStructure objects can not be written to " "an PDB format file") mm.mmpdb_initialize(mm.error_handler) fh = mm.mmpdb_new() # by default structure.PDBWriter reorders atoms, it is critical # that this is avoided due to atom index references in the packmol # input file mm.mmpdb_set(fh, mm.MMPDB_NO_REORDER_ATOMS) mm.mmpdb_write(fh, ct, self.filename) mm.mmpdb_delete(fh)
def _set_atom_properties(st, key, available_values, justify=False): """ Set atom properties. :type st: schrodinger.structure.Structure :param st: the structure :type key: str :param key: the property key :type available_values: list :param available_values: unique available values for the property :type justify: bool :param justify: whether to left justify string values in a field of length 4 """ # pdb properties like atom name are limited to 4 characters and # so we assume no more than 26 identifiers (see more info in the calling # code), the following means all possible 26 identifiers have been used, # either because all 26 have been assigned by this function or because # the input already used all 26 identifiers, returning means that some # atoms may not be identified using pdb properties if not available_values: return values = [] for atom in st.atom: value = atom.property.get(key, None) if (isinstance(value, str) and value.strip() in ['', 'UNK']) or \ (isinstance(value, int) and not value): value = None values.append(value) if None not in values: return new_value = available_values.pop(0) if justify: new_value = new_value.ljust(4) for idx, value in enumerate(values, 1): if value is None: st.atom[idx].property[key] = new_value
[docs]def set_unique_pdb_atom_names(st): """ Set unique PDB atom names on the given structure. :type st: schrodinger.structure.Structure :param st: the structure """ adict = {} for atom in st.atom: idxs = adict.setdefault(atom.pdbres, {}).setdefault(atom.element, []) idxs.append(atom.index) idx = str(len(idxs)) atom.pdbname = f'{atom.element}{idx}'.ljust(4)
def _fix_bonding(st, sts, bond_dict=None): """ Fix the bonding output from packmol. :type st: schrodinger.structure.Structure :param st: the structure to fix :type sts: dict :param sts: keys are file names (referenced in the given packmol input files), values are schrodinger.structure.Structure :type bond_dict: dict or None :param bond_dict: keys are pair tuples containing PDB atom names and residue names, values are triple tuples containing PDB atom names, residue names, and bond orders, or None if it is to be determined :rtype: dict :return: the bond_dict """ # the structure to fix comes from using the PDBReader class # on the packmol output pdb file, even though the input pdb # files of comoponents have connections the packmol output pdb # does not, the PDBReader class by default assigns bonds, yet # the assignment can be missing bonds, for both atomistic and # CG structures, or the assignment can be wrong, for example # due to packmol exiting with a FORCED status which results # in non-bonding atoms that are too close to each other, # the PDBReader class lacks a clean API to prevent bonding # so remove bonds here and rebond pairs = [(bond.atom1, bond.atom2) for bond in st.bond] for pair in pairs: st.deleteBond(*pair) if not bond_dict: bond_dict = {} for file_name, mae_st in sts.items(): # read the PDB to get the PDB atom names that were # used during writing pdb_st = structure.Structure.read(file_name) # ordering is enforced for mae_atom, pdb_atom in zip(mae_st.atom, pdb_st.atom): # this tuple will be unique over the entire system atuple = get_pdb_data(pdb_atom.pdbname, pdb_atom.pdbres) btuples = [] for mae_neighbor in mae_atom.bonded_atoms: order = mae_st.getBond(mae_atom, mae_neighbor).order pdb_neighbor = pdb_st.atom[mae_neighbor.index] btuple = get_pdb_data(pdb_neighbor.pdbname, pdb_neighbor.pdbres, order=order) btuples.append(btuple) bond_dict[atuple] = btuples idx_dict = {} for atom in st.atom: atuple = get_pdb_data(atom.pdbname, atom.pdbres, resnum=atom.resnum, chain_name=atom.chain_name) idx_dict[atuple] = atom.index # see MATSCI-7720 - since only a single instance of each residue type # is supported per component there will never be bonds between atoms with # the same residue name but different residue numbers, atoms in a # component all have the same chain name for atuple, idx in idx_dict.items(): a_key = get_pdb_data(atuple.pdbname, atuple.pdbres) for btuple in bond_dict[a_key]: b_key = get_pdb_data(btuple.pdbname, btuple.pdbres, resnum=atuple.resnum, chain_name=atuple.chain_name) jdx = idx_dict[b_key] msutils.add_or_update_bond_order(st, idx, jdx, btuple.order) return bond_dict def _fix_coarse_grained_st(st, sts, cg_info_dict=None): """ Fix the coarse grained structure output from packmol. :type st: schrodinger.structure.Structure :param st: the structure to fix :type sts: dict :param sts: keys are file names (referenced in the given packmol input files), values are schrodinger.structure.Structure :type cg_info_dict: dict or None :param cg_info_dict: keys are pair tuples containing PDB atom names and residue names, values are coarsegrain.ParticleInfo, or None if it is to be determined :raise RuntimeError: if there is a problem with the input :rtype: dict :return: the cg_info_dict """ try: is_coarse_grain = coarsegrain.is_coarse_grain_input( input_sts=sts.values()) except ValueError as err: raise RuntimeError(str(err)) if not is_coarse_grain: return # the packmol input and output pdb files lack coarse-grained particle # attributes, since the s_m_pdb_atom_name is limited store that # information here so that it can be applied later if not cg_info_dict: cg_info_dict = {} for file_name, mae_st in sts.items(): # read the PDB to get the PDB atom names that were # used during writing pdb_st = structure.Structure.read(file_name) # ordering is enforced for mae_atom, pdb_atom in zip(mae_st.atom, pdb_st.atom): key = mae_atom.name name = key rgb = mae_atom.color.rgb radius = mm.mmct_atom_get_display_radius(mae_st, mae_atom.index) mass = mm.mmct_atom_get_coarse_grain_mass( mae_st, mae_atom.index) info = coarsegrain.ParticleInfo(key=key, name=name, rgb=rgb, radius=radius, mass=mass) # this tuple will be unique over the entire system atuple = get_pdb_data(pdb_atom.pdbname, pdb_atom.pdbres) cg_info_dict[atuple] = info coarsegrain.set_as_coarse_grain(st) for atom in st.atom: atuple = get_pdb_data(atom.pdbname, atom.pdbres) info = cg_info_dict[atuple] coarsegrain.set_atom_coarse_grain_properties(st, atom, info.name, rgb=info.rgb, radius=info.radius, mass=info.mass) st.applyStyle(atoms=structure.ATOM_BALLNSTICK, bonds=structure.BOND_BALLNSTICK) return cg_info_dict
[docs]def get_cells(input_files, sts, allow_failures=True): """ Return cells. :type input_files: dict :param input_files: keys are packmol input files, values are tuples of the 3 box lengths (Angstrom) defining the PBC :type sts: dict :param sts: keys are file names (referenced in the given packmol input files), values are schrodinger.structure.Structure :type allow_failures: bool :param allow_failures: if True then all but a single subjob are allowed to fail without causing the main job to fail :raise RuntimeError: if there is a problem with the input :rtype: dict :return: keys are input files, values are schrodinger.structure.Structure """ # 26 identifiers should be enough (potentially 1 for each componentent that # is missing data) residue_numbers = list(range(1, 27)) chain_names = list(string.ascii_uppercase) pdb_residue_names = [f'R{i}' for i in residue_numbers] pairs = [('i_m_residue_number', residue_numbers), ('s_m_chain_name', chain_names), ('s_m_pdb_residue_name', pdb_residue_names)] # uniqueify for st in sts.values(): for atom in st.atom: for key, collection in pairs: value = atom.property.get(key) if isinstance(value, str): value = value.strip() try: collection.remove(value) except ValueError: pass for file_name, st in sts.items(): # use the the run function for non-pdb structures, packmol # supports pdb, tinker, xyz, and moldy, with pdb the default if fileutils.splitext(file_name)[1] != PDB_EXT: raise RuntimeError('Only PDB structure files are supported.') _set_atom_properties(st, 'i_m_residue_number', residue_numbers) _set_atom_properties(st, 's_m_chain_name', chain_names) _set_atom_properties(st, 's_m_pdb_residue_name', pdb_residue_names, justify=True) # warnings regarding nonunique PDB atom names can be thrown, # yet in general uniqueifying them within the allowed 4 character # max is not possible, for example the packmol source example # protein.pdb has multiple atoms with PDB atom name of 'HH11', # if the structure lacks s_m_pdb_atom_name properties then the # PDBWriter creates them by numbering by element per residue per chain, # for example O1, H1, H2, (residue R1, chain A), O1, O2, # H1, (residue R2, chain A), O1, H1, H2 (residue R1, chain B), etc., # since unique PDB atom names are required for post-processing use # the PDBWriter convention if they aren't already unique if len(set(atom.pdbname for atom in st.atom)) < st.atom_total: # the PDBWriter PDB atom name assignment convention requires that atoms # in a residue have contiguous indexing so instead do it by hand here set_unique_pdb_atom_names(st) with ioredirect.IOSilence(): with PDBWriter(file_name) as writer: writer.write(st) output_files = run(input_files.keys(), allow_failures=allow_failures) bond_dict = None cg_info_dict = None out_sts = {} for input_file, output_file in output_files.items(): if output_file is None: continue st = structure.Structure.read(output_file) box_lengths = input_files[input_file] chorus = (box_lengths[0], 0., 0., 0., box_lengths[1], 0., 0., 0., box_lengths[2]) xtal.set_pbc_properties(st, chorus) if not st.title: st.title = fileutils.get_basename(input_file) bond_dict = _fix_bonding(st, sts, bond_dict=bond_dict) cg_info_dict = _fix_coarse_grained_st(st, sts, cg_info_dict=cg_info_dict) if not coarsegrain.is_coarse_grain_input(input_sts=sts.values()): color.apply_color_scheme(st, 'element') # see MATSCI-9010 - allow polymer builder output which features multiple # residues by the same name unset_unique_pdb_res_names(st) out_sts[input_file] = st return out_sts
[docs]def write_desmond_cells(input_files, sts, allow_failures=True, force_field=None, water_force_field=msconst.SPC, cg_ff_loc_type=parserutils.LOCAL_CG_FF_LOCATION_TYPE): """ Write Desmond cells. :type input_files: dict :param input_files: keys are packmol input files, values are tuples of the 3 box lengths (Angstrom) defining the PBC :type sts: dict :param sts: keys are file names (referenced in the given packmol input files), values are schrodinger.structure.Structure :type allow_failures: bool :param allow_failures: if True then all but a single subjob are allowed to fail without causing the main job to fail :type force_field: str :param force_field: name of FF to apply :type water_force_field: str :param water_force_field: name of the water force field to apply, options are available in desmondutils :type cg_ff_loc_type: str :param cg_ff_loc_type: specifies the location to which to look for coarse-grained force field files, one of parserutils.INSTALLED_CG_FF_LOCATION_TYPE or parserutils.LOCAL_CG_FF_LOCATION_TYPE :raise RuntimeError: if there is a problem with the input :rtype: dict :return: keys are input files, values are names of written Desmond `*cms` files """ if force_field is None: force_field = msutils.get_default_forcefield().name, cells = get_cells(input_files, sts, allow_failures=allow_failures) cleaner = jobutils.StringCleaner() wam_type = jobutils.WAM_TYPES.MS_PREPARE_FOR_MD output_files = {} for input_file, st in cells.items(): base_name = fileutils.get_basename(input_file) input_file = base_name + '_out.mae' st.write(input_file) st = xtal.sync_pbc(st, create_pbc=True) title = cleaner.cleanAndUniquify(st.title) pbc_position = st.property.get(xtal.PBC_POSITION_KEY, xtal.CENTER_PBC_POSITION) rezero_system = not pbc_position.startswith(xtal.ANCHOR_PREFIX) sysfile = desmondutils.run_system_builder( st, title, forcefield=force_field, rezero_system=rezero_system, split_components=True, water_fftype=water_force_field, set_incorporation=False, wam_type=wam_type, location_type=cg_ff_loc_type) if not sysfile: if not allow_failures: msg = f'The Desmond system could not be prepared for input {st.title}.' raise RuntimeError(msg) continue output_files[input_file] = sysfile for output_file in output_files.values(): if not os.path.exists(output_file): msg = f'The Desmond system output file {output_file} can not be found.' raise RuntimeError(msg) model = cms.Cms(file=output_file) try: desmondutils.assign_lipid_ff(model) except ValueError: # this means no known lipid template is present, so # nothing to do pass else: model.write(output_file) jobutils.add_wam_to_cms(output_file, wam_type) jobutils.add_outfile_to_backend(output_file) return output_files
[docs]def set_unique_pdb_res_names(st): """ Set unique PDB residue names on the given structure. :raise RuntimeError: if there is a problem with the input :type st: schrodinger.structure.Structure :param st: the structure """ adict = {} for residue in st.residue: adict.setdefault(residue.pdbres.strip(), []).append(residue) for name, residues in adict.items(): if len(residues) == 1: continue for tag_idx, residue in enumerate(residues, 1): pdbres = f'{name}{UNIQUE_PDB_RES_TAG}{tag_idx}' if len(pdbres) > 4: raise RuntimeError( 'The uniqueified PDB residue name ' f'of {pdbres} for residue {name} has more than 4 ' 'characters which is not supported.') residue.pdbres = pdbres.ljust(4)
[docs]def unset_unique_pdb_res_names(st): """ Unset unique PDB residue names on the given structure. :type st: schrodinger.structure.Structure :param st: the structure """ for residue in st.residue: try: name, tag_idx = residue.pdbres.strip().split(UNIQUE_PDB_RES_TAG) except ValueError: continue residue.pdbres = name.ljust(4)
[docs]def group_infos_by_layer(infos): """ Group the given information objects into a dictionary keyed by layer. :type infos: list[`SurfactantInfo`] or list[`SolventInfo`] :param infos: contains surfactant or solvent infos :rtype: dict :return: the information objects keyed by layer """ adict = {} for info in infos: adict.setdefault(info.layer, []).append(info) return adict
[docs]def get_max_distance_btw_groups(st, idxs, jdxs): """ Return the maximum distance (Angstrom) between the given groups of atom indices. :type st: `schrodinger.structure.Structure` :param st: the structure :type idxs: tuple :param idxs: first group of atom indices :type jdxs: tuple :param jdxs: second group of atom indices :rtype: float :return: the maximum distance in Angstrom """ # it is possibile that this distance would better be taken # as a projection onto a given axis, for example c lattice, # radial, etc. vector amax = -numpy.inf for idx in idxs: for jdx in jdxs: if idx == jdx: continue dist = st.measure(idx, jdx) dist += st.atom[idx].radius + st.atom[jdx].radius amax = max(amax, dist) if amax == -numpy.inf: idx = idxs[0] amax = 2 * st.atom[idx].radius return amax
[docs]class PackmolInputFileException(Exception): pass
[docs]class PackmolInputFile: """ Manage a packmol input file. """ IN_EXT = '.inp'
[docs] def __init__(self, tolerance=2., filetype='pdb', output_base_name='packmol', comment='', general_body=''): """ Create an instance. :type tolerance: float :param tolerance: the distance tolerance in Angstrom :type filetype: str :param filetype: the file type to use for all structure files, pdb, tinker, xyz, or moldy :type output_base_name: str :param output_base_name: the base name to use for the packmol output structure file :type comment: str :param comment: a comment placed at the top of the the packmol input file, include preceeding '#' :type general_body: str :param general_body: the general body, should contain newlines, this is for any additional top level parameters that do not have to do with structures """ self.tolerance = tolerance self.filetype = filetype self.output = output_base_name + f'.{self.filetype}' self.comment = comment self.general_body = general_body self.structure_bodies = []
[docs] def addStructureBody(self, base_name, body): """ Add a structure body to the input file. :type base_name: str :param base_name: the base name of the input structure file :type body: str :param body: the body, should contain indentation and newlines """ self.structure_bodies.append((base_name, body))
[docs] def addStructureBodies(self): """ Add structure bodies to the input file. :raise PackmolInputFileException: if there is an issue with the input """ raise NotImplementedError
[docs] def write(self, input_base_name='packmol'): """ Write the packmol input file. :type input_base_name: str :param input_base_name: the base name to use for the packmol input file :raise PackmolInputFileException: if there is an issue with the input :rtype: str :return: the packmol input file name """ if not self.structure_bodies: raise PackmolInputFileException('No structural information ' 'has been provided.') input_file_name = f'{input_base_name}{self.IN_EXT}' with open(input_file_name, 'w') as afile: if self.comment: afile.write(f'{self.comment}\n\n') afile.write(f'tolerance {str(self.tolerance)}\n') afile.write(f'filetype {self.filetype}\n') afile.write(f'output {self.output}\n\n') if self.general_body: afile.write(f'{self.general_body}') for base_name, body in self.structure_bodies: afile.write(f'structure {base_name}.{self.filetype}\n') afile.write(f'{body}') afile.write('end structure\n\n') return input_file_name
[docs]class StructuredLiquidInputFile(PackmolInputFile): """ Manage a structured liquid input file. """
[docs] def __init__(self, *args, **kwargs): """ See parent class for documentation. """ super().__init__(*args, **kwargs) self._number_dict = {} self._volume_dict = {}
[docs] def check(self): """ Check cell lengths. :raise PackmolInputFileException: if there is an issue with the input """ la, lb, lc = self.cell_lengths a_min, b_min, c_min = self.getMinCellLengths() if a_min is not None and a_min > la: vector = 'a-vector' elif b_min is not None and b_min > lb: vector = 'b-vector' elif c_min is not None and c_min > lc: vector = 'c-vector' else: vector = None if vector: raise PackmolInputFileException( f'The length of the {vector} is ' 'not large enough to accomodate surfactant and solvent layers.')
[docs] def prepare(self, cell_lengths, surfactant_infos, solvent_infos, layer_sep=1, packing_f=DEFAULT_PACKING_F, min_constraint_window_surfactant_idxs=25): """ Prepare. :type cell_lengths: tuple :param cell_lengths: the cell lengths of the output structure file :type surfactant_infos: list :param surfactant_infos: contains SurfactantInfo :type solvent_infos: list :param solvent_infos: contains SolventInfo :type layer_sep: float :param layer_sep: the layer separation in Angstrom :type packing_f: float :param packing_f: a packing efficiency factor used to control the density of surfactant and solvent molecules :type min_constraint_window_surfactant_idxs: float :param min_constraint_window_surfactant_idxs: this is the minimum window length for constraining surfactant hydrophilic and hydrophobic indices as a percentage of the surfactant length, should be in (0, 50) :raise PackmolInputFileException: if there is an issue with the input """ for info in surfactant_infos + solvent_infos: self._number_dict[info] = 0 self._volume_dict[info] = 0 if not 0 < min_constraint_window_surfactant_idxs < 50: raise PackmolInputFileException( 'The minimum constraint window for surfactant ' 'hydrophilic and hydrophobic end atom indices should be a percentage in the ' 'range of (0, 50).') self.cell_lengths = cell_lengths self.surfactant_infos = group_infos_by_layer(surfactant_infos) self.solvent_infos = group_infos_by_layer(solvent_infos) # for coarse-grained systems the layer separation does not need to be # changed to account for particle radii, this effect is handled elsewhere self.layer_sep = layer_sep self.packing_f = packing_f # set some distance attributes for surfactant self.max_surfactant_dists = {} for layer, infos in self.surfactant_infos.items(): max_surfactant_dist = -numpy.inf max_hydrophilic_dist = -numpy.inf max_hydrophobic_dist = -numpy.inf for info in infos: max_surfactant_dist = max( max_surfactant_dist, get_max_distance_btw_groups(info.st, info.hydrophilic_idxs, info.hydrophobic_idxs)) max_hydrophilic_dist = max( max_hydrophilic_dist, get_max_distance_btw_groups(info.st, info.hydrophilic_idxs, info.hydrophilic_idxs)) max_hydrophobic_dist = max( max_hydrophobic_dist, get_max_distance_btw_groups(info.st, info.hydrophobic_idxs, info.hydrophobic_idxs)) amin = min_constraint_window_surfactant_idxs * max_surfactant_dist / 100 max_hydrophilic_dist = max(max_hydrophilic_dist, amin) max_hydrophobic_dist = max(max_hydrophobic_dist, amin) self.max_surfactant_dists[layer] = (max_surfactant_dist, max_hydrophilic_dist, max_hydrophobic_dist) # set some distance attributes for counter-ion self.max_cion_dists = self.getMaxDists(self.surfactant_infos) # set some distance attributes for solvent self.max_solvent_dists = self.getMaxDists(self.solvent_infos)
[docs] def getMaxDists(self, all_infos): """ Return a dictionary of maximum distances (Ang.) among the structures in each layer of the given all_infos. :type all_infos: dict :param all_infos: keys are layers, values are lists containing either SurfactantInfo or SolventInfo :rtype: dict :return: keys are layers, values are maximum distances """ max_dists = {} for layer, infos in all_infos.items(): max_dist = -numpy.inf for info in infos: if isinstance(info, SurfactantInfo): st = info.cion_st if not st: continue else: st = info.st idxs = list(range(1, st.atom_total + 1)) max_dist = max(max_dist, get_max_distance_btw_groups(st, idxs, idxs)) max_dists[layer] = max_dist return max_dists
[docs] def getBoxSliceVol(self, cmin, cmax): """ Return the box slice volume. :type cmin: float :param cmin: the lower bound on the layer in Angstrom :type cmax: float :param cmax: the upper bound on the layer in Angstrom :rtype: float :return: the box slice volume in Ang.^3 """ la, lb, lc = self.cell_lengths return la * lb * (cmax - cmin)
[docs] def getSphereSliceVol(self, rmin, rmax): """ Return the sphere slice volume. :type rmin: float :param rmin: the lower bound on the layer in Angstrom :type rmax: float :param rmax: the upper bound on the layer in Angstrom :rtype: float :return: the sphere slice volume in Ang.^3 """ return 4 * numpy.pi * (rmax**3 - rmin**3) / 3
[docs] def getOutsideSphereVol(self, radius): """ Return the box volume less the sphere volume. :type radius: float :param radius: the radius of the sphere in Angstrom :rtype: float :return: the box volume less the sphere volume in Ang.^3 """ sphere_vol = self.getSphereSliceVol(0, radius) la, lb, lc = self.cell_lengths box_vol = self.getBoxSliceVol(0, lc) return box_vol - sphere_vol
[docs] def getCylinderSliceVol(self, rmin, rmax, length): """ Return the cylinder slice volume. :type rmin: float :param rmin: the lower bound on the layer in Angstrom :type rmax: float :param rmax: the upper bound on the layer in Angstrom :type length: float :param length: the length of the cylinder in Angstrom :rtype: float :return: the cylinder slice volume in Ang.^3 """ return length * numpy.pi * (rmax**2 - rmin**2)
[docs] def getOutsideCylinderVol(self, radius, length): """ Return the box volume less the cylinder volume. :type radius: float :param radius: the radius of the cylinder in Angstrom :type length: float :param length: the length of the cylinder in Angstrom :rtype: float :return: the box volume less the cylinder volume in Ang.^3 """ cylinder_vol = self.getCylinderSliceVol(0, radius, length) la, lb, lc = self.cell_lengths box_vol = self.getBoxSliceVol(0, lc) return box_vol - cylinder_vol
[docs] def getEllipsoidSliceVol(self, rmin, rmax): """ Return the ellipsoid slice volume. :type rmin: float :param rmin: the lower bound on the layer in Angstrom :type rmax: float :param rmax: the upper bound on the layer in Angstrom :rtype: float :return: the ellipsoid slice volume in Ang.^3 """ # ellipsoids are prolate spheroids rpmin = self.factor * rmin rpmax = self.factor * rmax return 4 * numpy.pi * (rmax * rmax * rpmax - rmin * rmin * rpmin) / 3
[docs] def getOutsideEllipsoidVol(self, radius): """ Return the box volume less the ellipsoid volume. :type radius: float :param radius: the radius of the ellipsoid in Angstrom, this is half the length of the minor axis :rtype: float :return: the box volume less the ellipsoid volume in Ang.^3 """ ellipsoid_vol = self.getEllipsoidSliceVol(0, radius) la, lb, lc = self.cell_lengths box_vol = self.getBoxSliceVol(0, lc) return box_vol - ellipsoid_vol
[docs] def getNumber(self, info, amin, amax): """ Return the number of surfactant or solvent molecules to add for the given info. :type info: SurfactantInfo or SolventInfo :param info: the info object for this layer :type amin: float :param amin: the lower bound on the layer in Angstrom :type amax: float or None :param amax: the upper bound on the layer in Angstrom or None if there isn't one in which case the remaining outermost space of the cell will be used :rtype: int :return: the number of molecules """ if info.number: self._number_dict[info] += info.number return info.number # see MATSCI-12671 - prevent packmol from having to pack outside # the requested cell dimensions for CG structures, otherwise this # leads to overlapping images, do this by increasing the molecular # volume, the 1.56 = 2.35 Ang. / 1.5 Ang. where 2.35 Ang. is a # typical Martini particle radius and 1.5 Ang. is the radius for a # carbon atom vdw_scale = 1.56 if msutils.is_coarse_grain(info.st) else 1 buffer_len = max(atom.radius for atom in info.st.atom) mol_vol = features.Complex(info.st).getVDWVolume(vdw_scale=vdw_scale, buffer_len=buffer_len) vol = self.getLayerVolume(amin, amax) # does the number of solvent molecules need to be reduced by the volume # taken up by counter-ions? number = max( int(self.packing_f * (info.packing_pct / 100) * vol / mol_vol), 1) self._number_dict[info] += number return number
[docs] def getLayerVolume(self, amin, amax, buffer_len=0): """ Return the volume of this layer. :type amin: float :param amin: the lower bound on the layer in Angstrom :type amax: float or None :param amax: the upper bound on the layer in Angstrom or None if there isn't one in which case the remaining outermost space of the cell will be used :type buffer_len: float :param buffer_len: a buffer length in Angstrom :rtype: int :return: the volume in Ang.^3 """ if amax: vol = self.getVol(amin, amax + buffer_len) else: vol = self.getOutsideVol() return vol
[docs] def addLayer(self, info, amin, amax, hydrophilic_at_max=False, number=None, add_cion=False): """ Add a layer. :type info: SurfactantInfo or SolventInfo :param info: the info object for this layer :type amin: float :param amin: the lower bound on the layer in Angstrom :type amax: float or None :param amax: the upper bound on the layer in Angstrom or None if there isn't one in which case the remaining outermost space of the cell will be used :type hydrophilic_at_max: bool :param hydrophilic_at_max: if info is SurfactantInfo whether the hydrophilic atoms are to be located at the maximum :type number: int or None :param number: the number of molecules to add, if None it will be determined :type add_cion: bool :param add_cion: if info is SurfactantInfo whether the layer is for the counter-ion """ if number is None: number = self.getNumber(info, amin, amax) body = f' number {number}\n' st = info.st name = info.name # handle surfactant and solvent constraints hydrophilic_idxs = getattr(info, 'hydrophilic_idxs', None) hydrophobic_idxs = getattr(info, 'hydrophobic_idxs', None) if hydrophilic_idxs and hydrophobic_idxs: if add_cion: st = info.cion_st name = info.cion_name # counter-ions have the same constraints as solvent body += self.getSolventConstraints(amin, amax) else: hydrophilic_idxs = ' '.join(str(x) for x in hydrophilic_idxs) hydrophobic_idxs = ' '.join(str(x) for x in hydrophobic_idxs) max_surfactant_dist, max_hydrophilic_dist, max_hydrophobic_dist = \ self.max_surfactant_dists[info.layer] if hydrophilic_at_max: hydrophilic_key = self.SURFACTANT_TOP_ATOM_CONSTRAINT hydrophilic_value = self.getConstraintType( amax - max_hydrophilic_dist) hydrophobic_key = self.SURFACTANT_BOTTOM_ATOM_CONSTRAINT hydrophobic_value = self.getConstraintType( amin + max_hydrophobic_dist) else: hydrophilic_key = self.SURFACTANT_BOTTOM_ATOM_CONSTRAINT hydrophilic_value = self.getConstraintType( amin + max_hydrophilic_dist) hydrophobic_key = self.SURFACTANT_TOP_ATOM_CONSTRAINT hydrophobic_value = self.getConstraintType( amax - max_hydrophobic_dist) body += self.getSurfactantConstraints(amin, amax) body += f' atoms {hydrophilic_idxs}\n' body += f' {hydrophilic_key} {hydrophilic_value}\n' body += ' end atoms\n' body += f' atoms {hydrophobic_idxs}\n' body += f' {hydrophobic_key} {hydrophobic_value}\n' body += ' end atoms\n' else: body += self.getSolventConstraints(amin, amax) body += self.getCoarseGrainedRadiiBody(st) # see MATSCI-7720 - we need to ensure unique residue numbering # in order to properly post-process the bonding, etc. body += ' resnumbers 0\n' self.addStructureBody(name, body)
[docs] def getCoarseGrainedRadiiBody(self, st): """ Return the coarse-grained radii body. :type st: schrodinger.structure.Structure :param st: the structure :rtype: str :return: the coarse-grain radii body """ if not msutils.is_coarse_grain(st): return '' # all particles of a given name will have the same radius names_to_idxs = {} names_to_radii = {} for atom in st.atom: names_to_idxs.setdefault(atom.name, []).append(atom.index) names_to_radii[atom.name] = atom.radius body = '' for name, idxs in names_to_idxs.items(): idxs = ' '.join(str(x) for x in idxs) radius = names_to_radii[name] body += f' atoms {idxs}\n' body += f' radius {radius}\n' body += ' end atoms\n' return body
[docs] def getTotalSurfactantThickness(self, layers_are_bilayers=True): """ Return the total surfactant thickness in Angstrom. :type layers_are_bilayers: bool :param layers_are_bilayers: whether layers are bilayers :rtype: float :return: the total surfactant thickness in Angstrom """ # include separations thicknesses = [dists[0] for dists in self.max_surfactant_dists.values()] n_layer_seps = len(self.max_surfactant_dists) - 1 thickness = sum(thicknesses) + n_layer_seps * self.layer_sep if layers_are_bilayers: return 2 * thickness + self.layer_sep else: return thickness
[docs] def getTotalCIonThickness(self): """ Return the total counter-ion thickness in Angstrom. :rtype: float :return: the total counter-ion thickness in Angstrom """ # do not include separations if self.max_cion_dists: return max(self.max_cion_dists.values()) else: return FALLBACK_THICKNESS
[docs] def getTotalNonSurfactantThickness(self): """ Return the total non-surfactant thickness in Angstrom. :rtype: float :return: the total non-surfactant thickness in Angstrom """ # do not include separations # counter-ions always go in the hydrophilic layer cion_t = self.getTotalCIonThickness() philic_t = self.max_solvent_dists.get(HYDROPHILIC, FALLBACK_THICKNESS) phobic_t = self.max_solvent_dists.get(HYDROPHOBIC, FALLBACK_THICKNESS) return max(cion_t, philic_t) + phobic_t
[docs] def addSurfactantBodies(self, start_dist, start_hydrophilic_at_max=False, layers=None, layers_are_bilayers=True): """ Add surfactant bodies to the input file. :type start_dist: float :param start_dist: start adding surfactants at this distance in Angstrom :type start_hydrophilic_at_max: bool :param start_hydrophilic_at_max: specifies whether the hydrophilic atoms for the starting layer are to be located at the maximum :type layers: list or None :param layers: the surfactant layers to add, if None then all will be added :type layers_are_bilayers: bool :param layers_are_bilayers: whether layers are bilayers :rtype: float, bool :return: surfactants stopped being added at this distance in Angstrom, whether the hydrophilic atoms for the final layer are located at the maximum """ if layers is None: layers = self.getSurfactantLayers() amax = start_dist hydrophilic_at_max = not start_hydrophilic_at_max for layer in layers: surfactant_infos = self.surfactant_infos[layer] surfactant_t = self.max_surfactant_dists[layer][0] amin = amax + self.layer_sep amax = amin + surfactant_t hydrophilic_at_max = not hydrophilic_at_max for info in surfactant_infos: number = self.getNumber(info, amin, amax) self._volume_dict[info] += self.getLayerVolume( amin, amax, buffer_len=self.layer_sep) self.addLayer(info, amin, amax, hydrophilic_at_max=hydrophilic_at_max, number=number) if not layers_are_bilayers: continue amin = amax + self.layer_sep amax = amin + surfactant_t hydrophilic_at_max = not hydrophilic_at_max for info in surfactant_infos: number = self.getNumber(info, amin, amax) self._volume_dict[info] += self.getLayerVolume( amin, amax, buffer_len=self.layer_sep) self.addLayer(info, amin, amax, hydrophilic_at_max=hydrophilic_at_max, number=number) return amax, hydrophilic_at_max
[docs] def addCIonBodies(self, start_dist, stop_dist, layers=None, factor=1): """ Add counter-ion bodies to the input file. :type start_dist: float :param start_dist: start adding counter-ions at this distance in Angstrom :type stop_dist: float :param stop_dist: stop adding counter-ions at this distance in Angstrom :type layers: list or None :param layers: the counter-ion layers to add, if None then all will be added :type factor: float :param factor: multiplies the corresponding number of surfactant molecules to set the number of counter-ions in the given region """ if layers is None: layers = self.getSurfactantLayers() for layer in layers: for info in self.surfactant_infos[layer]: if info.cion_st: # does the rounding done here keep charges consistent? number = int(factor * self._number_dict[info]) or 1 self.addLayer(info, start_dist, stop_dist, number=number, add_cion=True)
[docs] def addSolventBodies(self, start_dist, stop_dist, layers=None, layer_sep=None): """ Add solvent bodies to the input file. :type start_dist: float :param start_dist: start adding solvents at this distance in Angstrom :type stop_dist: float :param stop_dist: stop adding solvents at this distance in Angstrom :type layers: list or None :param layers: the solvent layers to add, if None then all will be added :type layer_sep: float :param layer_sep: the layer separation in Angstrom """ if layer_sep is None: layer_sep = self.layer_sep if layers is None: layers = self.getSolventLayers() for layer in layers: for info in self.solvent_infos[layer]: self._volume_dict[info] += self.getLayerVolume( start_dist, stop_dist, buffer_len=layer_sep) self.addLayer(info, start_dist, stop_dist)
[docs] def getSurfactantLayers(self): """ Return a list of surfactant layers. :rtype: list :return: the surfactant layers """ return sorted(self.surfactant_infos.keys())
[docs] def getSolventLayers(self): """ Return a list of solvent layers. :rtype: list :return: the solvent layers """ return sorted(self.solvent_infos.keys())
[docs] def getConstraintType(self, parameter): """ Return the constraint type. :type parameter: float :param parameter: the parameter in Angstrom :rtype: str :return: the constraint type """ return self.CONSTRAINT_TYPE.format(parameter=parameter)
[docs]class MonolayerInputFile(StructuredLiquidInputFile): """ Manage a monolayer input file. """ SURFACTANT_TOP_ATOM_CONSTRAINT = 'over' SURFACTANT_BOTTOM_ATOM_CONSTRAINT = 'below' CONSTRAINT_TYPE = 'plane 0 0 1 {parameter}'
[docs] def getMinCellLengths(self): """ Return the minimum cell lengths. :rtype: tuple :return: a triple containing float and/or None, float is a minimum cell length in Angstrom, None indicates that there is no minimum """ t_min = self.getTotalSurfactantThickness(layers_are_bilayers=False) t_min += self.getTotalNonSurfactantThickness() t_min += 2 * self.layer_sep return (None, None, t_min)
[docs] def check(self): """ Check cell lengths. :raise PackmolInputFileException: if there is an issue with the input """ # monolayers are asymmetric and so both a hydrophilic and hydrophobic solvent # must be given if len(self.solvent_infos) != 2: raise PackmolInputFileException( 'Two different solvent layers are required.') super().check()
[docs] def getVol(self, amin, amax): """ Return the volume. :type amin: float :param amin: the lower bound on the layer in Angstrom :type amax: float :param amax: the upper bound on the layer in Angstrom :rtype: float :return: the volume in Ang.^3 """ return self.getBoxSliceVol(amin, amax)
[docs] def getSurfactantConstraints(self, amin, amax): """ Return the surfactant constraints. :type amin: float :param amin: the lower bound on the layer in Angstrom :type amax: float :param amax: the upper bound on the layer in Angstrom :rtype: str :return: the surfactant constraints """ la, lb, lc = self.cell_lengths return f' inside box 0 0 {amin} {la} {lb} {amax}\n'
[docs] def getSolventConstraints(self, amin, amax): """ Return the solvent constraints. :type amin: float :param amin: the lower bound on the layer in Angstrom :type amax: float :param amax: the upper bound on the layer in Angstrom :rtype: str :return: the solvent constraints """ la, lb, lc = self.cell_lengths body = f' inside box 0 0 {amin} {la} {lb} {amax}\n' return body
[docs] def addStructureBodies(self): """ See parent class for documentation. """ self.check() la, lb, lc = self.cell_lengths total_surfactant_t = self.getTotalSurfactantThickness( layers_are_bilayers=False) solvent_t = (lc - (total_surfactant_t + 2 * self.layer_sep)) / 2 # bottom solvent c_min = 0 c_max = c_min + solvent_t self.addSolventBodies(c_min, c_max, layers=[HYDROPHILIC]) # surfactants c_max, final_hydrophilic_at_max = self.addSurfactantBodies( c_max, start_hydrophilic_at_max=False, layers_are_bilayers=False) # counter-ion, only add for hydrophilic layer self.addCIonBodies(c_min, c_min + solvent_t, factor=1) # top solvent c_min = c_max + self.layer_sep c_max = c_min + solvent_t self.addSolventBodies(c_min, c_max, layers=[HYDROPHOBIC], layer_sep=0)
[docs]class BilayerInputFile(MonolayerInputFile): """ Manage a bilayer input file. """
[docs] def getMinCellLengths(self): """ Return the minimum cell lengths. :rtype: tuple :return: a triple containing float and/or None, float is a minimum cell length in Angstrom, None indicates that there is no minimum """ t_min = self.getTotalSurfactantThickness() t_min += 2 * self.getTotalNonSurfactantThickness() t_min += 2 * self.layer_sep return (None, None, t_min)
[docs] def check(self): """ Check cell lengths. :raise PackmolInputFileException: if there is an issue with the input """ if len(self.solvent_infos) != 1: raise PackmolInputFileException( 'Only a single solvent layer is allowed.') StructuredLiquidInputFile.check(self)
[docs] def addStructureBodies(self): """ See parent class for documentation. """ self.check() la, lb, lc = self.cell_lengths total_surfactant_t = self.getTotalSurfactantThickness() solvent_t = (lc - (total_surfactant_t + 2 * self.layer_sep)) / 2 # bottom solvent c_min = 0 c_max = c_min + solvent_t self.addSolventBodies(c_min, c_max) # surfactants c_max, final_hydrophilic_at_max = self.addSurfactantBodies(c_max) # bottom counter-ion self.addCIonBodies(c_min, c_min + solvent_t, factor=0.5) # top solvent c_min = c_max + self.layer_sep c_max = c_min + solvent_t self.addSolventBodies(c_min, c_max, layer_sep=0) # top counter-ion self.addCIonBodies(c_min, c_max, factor=0.5)
[docs]class MicelleInputFile(StructuredLiquidInputFile): """ Manage a micelle input file. """ SURFACTANT_TOP_ATOM_CONSTRAINT = 'outside' SURFACTANT_BOTTOM_ATOM_CONSTRAINT = 'inside' CONSTRAINT_TYPE = 'sphere 0 0 0 {parameter}'
[docs] def getMinCellLengths(self): """ Return the minimum cell lengths. :rtype: tuple :return: a triple containing float and/or None, float is a minimum cell length in Angstrom, None indicates that there is no minimum """ t_min = 2 * self.getRadius() t_min += 2 * self.getTotalNonSurfactantThickness() t_min += 2 * self.layer_sep return (t_min, t_min, t_min)
[docs] def check(self): """ Check cell lengths. :raise PackmolInputFileException: if there is an issue with the input """ if len(self.solvent_infos) != 1: raise PackmolInputFileException( 'Only a single solvent layer is allowed.') super().check()
[docs] def getRadius(self): """ Return the radius. :rtype: float :return: the radius in Ang. """ # in general a micelle is a stacked bilayer with a monolayer core total_surfactant_t = self.getTotalSurfactantThickness() first_surfactant_layer = self.getSurfactantLayers()[0] first_surfactant_t = self.max_surfactant_dists[first_surfactant_layer][ 0] return total_surfactant_t - first_surfactant_t - self.layer_sep
[docs] def getVol(self, amin, amax): """ Return the volume. :type amin: float :param amin: the lower bound on the layer in Angstrom :type amax: float :param amax: the upper bound on the layer in Angstrom :rtype: float :return: the volume in Ang.^3 """ return self.getSphereSliceVol(amin, amax)
[docs] def getOutsideVol(self): """ Return the outside volume. :rtype: float :return: the outside volume in Ang.^3 """ radius = self.getRadius() + self.layer_sep return self.getOutsideSphereVol(radius)
[docs] def getSurfactantConstraints(self, amin, amax): """ Return the surfactant constraints. :type amin: float :param amin: the lower bound on the layer in Angstrom :type amax: float :param amax: the upper bound on the layer in Angstrom :rtype: str :return: the surfactant constraints """ la, lb, lc = self.cell_lengths return f' inside box {-la/2} {-lb/2} {-lc/2} {la/2} {lb/2} {lc/2}\n'
[docs] def getSolventConstraints(self, amin, amax): """ Return the solvent constraints. :type amin: float :param amin: the lower bound on the layer in Angstrom :type amax: float :param amax: the upper bound on the layer in Angstrom :rtype: str :return: the solvent constraints """ body = self.getSurfactantConstraints(amin, amax) radius = self.getRadius() + self.layer_sep value = self.getConstraintType(radius) body += f' outside {value}\n' return body
[docs] def addStructureBodies(self): """ See parent class for documentation. """ self.check() layers = self.getSurfactantLayers() # first surfactant, monolayer layer = layers[0] r_min = 0 r_max = r_min + self.max_surfactant_dists[layer][0] hydrophilic_at_max = True for info in self.surfactant_infos[layer]: number = self.getNumber(info, r_min, r_max) self._volume_dict[info] += self.getLayerVolume( r_min, r_max, buffer_len=self.layer_sep) self.addLayer(info, r_min, r_max, hydrophilic_at_max=hydrophilic_at_max, number=number) # remaining surfactants, bilayers hydrophilic_at_max = not hydrophilic_at_max r_max, final_hydrophilic_at_max = self.addSurfactantBodies( r_max, start_hydrophilic_at_max=hydrophilic_at_max, layers=layers[1:]) # solvent r_min = r_max + self.layer_sep r_max = None self.addSolventBodies(r_min, r_max, layer_sep=0) # first counter-ion for info in self.surfactant_infos[layer]: if info.cion_st: number = self._number_dict[info] self.addLayer(info, r_min, r_max, number=number, add_cion=True) # remaining counter-ions self.addCIonBodies(r_min, r_max, layers=layers[1:], factor=1)
[docs]class LiposomeInputFile(MicelleInputFile): """ Manage a liposome input file. """
[docs] def prepare(self, *args, **kwargs): """ See parent class for documentation. :type radius: float :param radius: the radius of the liposome in Ang. """ self.radius = kwargs.pop('radius') super().prepare(*args, **kwargs)
[docs] def getMinRadius(self): """ Return the minimum radius. :rtype: float :return: the minimum radius in Angstrom """ r_min = self.getTotalSurfactantThickness() r_min += self.getTotalNonSurfactantThickness() r_min += self.layer_sep return r_min
[docs] def check(self): """ Check cell lengths. :raise PackmolInputFileException: if there is an issue with the input """ super().check() r_min = self.getMinRadius() radius = self.getRadius() if r_min > radius: raise PackmolInputFileException( 'The outer radius is ' 'not large enough to accomodate surfactant and solvent layers.')
[docs] def getRadius(self): """ Return the radius. :rtype: float :return: the radius in Ang. """ return self.radius
[docs] def getInnerRadius(self): """ Return the inner radius. :rtype: float :return: the inner radius in Ang. """ total_surfactant_t = self.getTotalSurfactantThickness() return self.getRadius() - total_surfactant_t - self.layer_sep
[docs] def getSolventConstraints(self, amin, amax): """ Return the solvent constraints. :type amin: float :param amin: the lower bound on the layer in Angstrom :type amax: float or None :param amax: the upper bound on the layer in Angstrom or None if there isn't one in which case the remaining outermost space of the cell will be used :rtype: str :return: the constraints """ if amax: body = self.getSurfactantConstraints(amin, amax) value = self.getConstraintType(amax) body += f' inside {value}\n' else: body = super().getSolventConstraints(amin, amax) return body
[docs] def addStructureBodies(self): """ See parent class for documentation. """ self.check() # inner solvent r_min = 0 r_max = self.getInnerRadius() self.addSolventBodies(r_min, r_max) # surfactants r_max, final_hydrophilic_at_max = self.addSurfactantBodies(r_max) inside_vol = self.getVol(0, self.getInnerRadius()) outside_vol = self.getOutsideVol() factor = inside_vol / (inside_vol + outside_vol) # inner counter-ions self.addCIonBodies(r_min, self.getInnerRadius(), factor=factor) # outer solvent r_min = r_max + self.layer_sep r_max = None self.addSolventBodies(r_min, r_max, layer_sep=0) # outer counter-ions factor = 1 - factor self.addCIonBodies(r_min, r_max, factor=factor)
[docs]class WormlikeMicelleInputFile(MicelleInputFile): """ Manage a wormlike micelle input file. """ CONSTRAINT_TYPE = 'cylinder 0 0 {bottom} 0 0 1 {parameter} {length}'
[docs] def getMinCellLengths(self): """ Return the minimum cell lengths. :rtype: tuple :return: a triple containing float and/or None, float is a minimum cell length in Angstrom, None indicates that there is no minimum """ t_min = 2 * self.getRadius() t_min += 2 * self.getTotalNonSurfactantThickness() t_min += 2 * self.layer_sep return (t_min, t_min, None)
[docs] def getVol(self, amin, amax): """ Return the volume. :type amin: float :param amin: the lower bound on the layer in Angstrom :type amax: float :param amax: the upper bound on the layer in Angstrom :rtype: float :return: the volume in Ang.^3 """ la, lb, lc = self.cell_lengths return self.getCylinderSliceVol(amin, amax, lc)
[docs] def getOutsideVol(self): """ Return the outside volume. :rtype: float :return: the outside volume in Ang.^3 """ radius = self.getRadius() + self.layer_sep la, lb, lc = self.cell_lengths return self.getOutsideCylinderVol(radius, lc)
[docs] def getConstraintType(self, parameter): """ Return the constraint type. :type parameter: float :param parameter: the parameter in Angstrom :rtype: str :return: the constraint type """ la, lb, lc = self.cell_lengths return self.CONSTRAINT_TYPE.format(bottom=-lc / 2, parameter=parameter, length=lc)
[docs]class ElongatedMicelleInputFile(MicelleInputFile): """ Manage an elongated micelle input file. """ CONSTRAINT_TYPE = 'ellipsoid 0 0 0 {parameter} {parameter} {parameter_p} 1'
[docs] def prepare(self, *args, **kwargs): """ See parent class for documentation. :type factor: float :param factor: the scale factor for the principal axis """ self.factor = kwargs.pop('factor') super().prepare(*args, **kwargs)
[docs] def getMinCellLengths(self): """ Return the minimum cell lengths. :rtype: tuple :return: a triple containing float and/or None, float is a minimum cell length in Angstrom, None indicates that there is no minimum """ t_min = 2 * self.getRadius() t_min += 2 * self.getTotalNonSurfactantThickness() t_min += 2 * self.layer_sep tp_min = 2 * self.getMajorRadius() tp_min += 2 * self.getTotalNonSurfactantThickness() tp_min += 2 * self.layer_sep return (t_min, t_min, tp_min)
[docs] def getMajorRadius(self): """ Return the major radius. :rtype: float :return: the major radius in Ang. """ # this is the radius of the elongated sphere return self.factor * self.getRadius()
[docs] def getVol(self, amin, amax): """ Return the volume. :type amin: float :param amin: the lower bound on the layer in Angstrom :type amax: float :param amax: the upper bound on the layer in Angstrom :rtype: float :return: the volume in Ang.^3 """ return self.getEllipsoidSliceVol(amin, amax)
[docs] def getOutsideVol(self): """ Return the outside volume. :rtype: float :return: the outside volume in Ang.^3 """ radius = self.getRadius() + self.layer_sep return self.getOutsideEllipsoidVol(radius)
[docs] def getConstraintType(self, parameter): """ Return the constraint type. :type parameter: float :param parameter: the parameter in Angstrom :rtype: str :return: the constraint type """ parameter_p = self.factor * parameter return self.CONSTRAINT_TYPE.format(parameter=parameter, parameter_p=parameter_p)
[docs]def get_writer(surfactant_infos, solvent_infos, base_name, seed, n_loop, cell_lengths, packing_f, model_type, **kwargs): """ Get the writer. :type surfactant_infos: list[`SurfactantInfo`] :param surfactant_infos: contains surfactant infos :type solvent_infos: list[`SolventInfo`] :param solvent_infos: contains solvent infos :type base_name: str :param base_name: base name used for output file naming :type seed: int :param seed: seed for random :type n_loop: int :param n_loop: the number of packmol loops :type cell_lengths: tuple[float] :param cell_lengths: the cell lengths in Angstrom :type packing_f: float :param packing_f: the packing factor :type model_type: str :param model_type: the model type, a key in the class map :rtype: `StructuredLiquidInputFile` :return: the writer """ # set up packmol input file, for now using fixed values of some parameters, # the given tolerance is used for both atomistic and coarse-grained, for # coarse-grained increasing it for a test case reproduced the same results tolerance = 2 # Ang. layer_sep = 1 # Ang. min_constraint_window_surfactant_idxs = 25 # percent output_base_name = f'{base_name}_out' general_body = f'seed {seed}\n' general_body += f'nloop {n_loop}\n' # MATSCI-7885 - it was decided that 'sidemax' should be increased # from the default of 1000. to avoid Packmol logging an error for # large input cells general_body += f'sidemax {SIDEMAX}\n' general_body += '\n' class_map = get_class_map() aclass = class_map[model_type] writer = aclass(tolerance=tolerance, output_base_name=output_base_name, general_body=general_body) writer.prepare(cell_lengths, surfactant_infos, solvent_infos, layer_sep=layer_sep, packing_f=packing_f, min_constraint_window_surfactant_idxs= min_constraint_window_surfactant_idxs, **kwargs) return writer
[docs]def get_parser(description, packmol_input=True): """ Get the command line argument parser. :type description: str :param description: the description :type packmol_input: bool :param packmol_input: whether the input is a packmol input file :rtype: parserutils.DriverParser :return: command line argument parser """ parser = parserutils.DriverParser( description=description, add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter) if packmol_input: ahelp = ('Specify packmol input *inp files.') parser.add_argument(FLAG_PACKMOL_INPUT_FILES, nargs='+', type=parserutils.type_file, help=ahelp) else: ahelp = ('Specify a structrued liquid builder input *json file.') parser.add_argument(FLAG_SLB_INPUT_FILE, type=parserutils.type_json_file, help=ahelp) ahelp = ('Specify a structure file for any *pdb files referenced in other ' 'input files. This flag can be used multiple times. Values ' 'are two strings, the first is the *mae file containing the ' 'structure while the second is the *pdb file name referenced ' 'in the other input files. Structures can be atomistic ' 'or coarse-grained but all must be one or the other.') parser.add_argument(FLAG_STRUCTURE_FILE, nargs=2, action='append', required=True, help=ahelp) ahelp = ( 'Specify in Angstrom the lengths of the a-, b-, and c- lattice ' 'vectors that define the periodic boundary condition for the output ' 'cells. This flag should be specified for each of the specified ' 'input files.') action = 'append' if packmol_input else 'store' parser.add_argument(FLAG_BOX_LENGTHS, nargs=3, action=action, required=True, type=parserutils.type_positive_float, help=ahelp) ahelp = ('If specified then the job fails if any subjob fails ' 'otherwise the job fails only if all subjobs fail.') parser.add_argument(FLAG_NO_FAILURES, action='store_true', help=ahelp) ahelp = ('By default an input Desmond *cms file is prepared for ' 'each packmol output file. Use this option to instead only write ' 'the outputs to *mae files.') parser.add_argument(FLAG_NO_PREP_MD, action='store_true', help=ahelp) atomistic_fields = parserutils.valid_forcefield_info().replace('Val', 'val') ahelp = ( 'Specify the force field to use if preparing Desmond *cms ' 'files. For atomistic systems {atff} For coarse-grained ' 'systems specify the name of a coarse-grained force field (see related ' 'location type flag {flag}.)').format(atff=atomistic_fields, flag=FLAG_CGFFLD_LOCATION_TYPE) parser.add_argument(FLAG_FORCE_FIELD, default=msutils.get_default_forcefield().name, help=ahelp) ahelp = ('Location type for a specified coarse-grained force field. ' 'Option \"{install}\" means from a standard ' 'location in the Schrodinger installation while option ' '\"{local}\" means either from {local_path} or the job ' 'launch directory, i.e. the CWD.').format( install=parserutils.INSTALLED_CG_FF_LOCATION_TYPE, local=parserutils.LOCAL_CG_FF_LOCATION_TYPE, local_path=cgff.FF_PARAMETERS_LOCAL_PATH) parser.add_argument(FLAG_CGFFLD_LOCATION_TYPE, choices=parserutils.CG_FF_LOCATION_TYPES, default=parserutils.LOCAL_CG_FF_LOCATION_TYPE, help=ahelp) #Specifiy the forcefield for water to use if preparing *cms' files parserutils.add_desmond_parser_arguments(parser, [WATER_FF_TYPE_FLAG]) options = [cmdline.HOST, cmdline.JOBNAME] cmdline.add_jobcontrol_options(parser, options=options, group_options=True) return parser
[docs]def get_job_spec_from_args(argv, description, program_name=PROGRAM_NAME, default_job_name=DEFAULT_JOB_NAME, packmol_input=True): """ Return a JobSpecification. :type argv: list :param argv: command line arguments including the script name at [0] :type description: str :param description: the description :type program_name: str :param program_name: the program name :type default_job_name: str :param default_job_name: the default job name :type packmol_input: bool :param packmol_input: whether the input is a packmol input file :rtype: schrodinger.job.launchapi.JobSpecification :return: the JobSpecification """ parser = get_parser(description, packmol_input=packmol_input) options = parser.parse_args(argv[1:]) prep_md = not jobutils.get_option(options, FLAG_NO_PREP_MD) if prep_md: input_sts = [] for mae_file, pdb_file in jobutils.get_option(options, FLAG_STRUCTURE_FILE): if os.path.exists(mae_file): input_sts.append(structure.Structure.read(mae_file)) else: parser.error(f'The {mae_file} can not be found.') return options = cgff.validate_options(parser, argv[1:], FLAG_FORCE_FIELD, FLAG_CGFFLD_LOCATION_TYPE, input_sts=input_sts) job_builder = jobutils.prepare_job_spec_builder(argv, program_name, default_job_name) if packmol_input: in_files = jobutils.get_option(options, FLAG_PACKMOL_INPUT_FILES) else: in_files = [jobutils.get_option(options, FLAG_SLB_INPUT_FILE)] for in_file in in_files: job_builder.setInputFile(in_file) for mae_file, pdb_file in jobutils.get_option(options, FLAG_STRUCTURE_FILE): job_builder.setInputFile(mae_file) if prep_md: cgff.add_local_type_force_field_to_job_builder_w_check( job_builder, options, FLAG_FORCE_FIELD, FLAG_CGFFLD_LOCATION_TYPE, input_sts=input_sts) return job_builder.getJobSpec()
[docs]def main(description, *args, default_job_name=DEFAULT_JOB_NAME): """ Main function used by drivers to run packmol. :type description: str :param description: the parser description :type default_job_name: str :param default_job_name: the default job name """ parser = get_parser(description) options = parser.parse_args(args) st_files = jobutils.get_option(options, FLAG_STRUCTURE_FILE) input_sts = [] for mae_file, pdb_file in st_files: if not os.path.exists(mae_file): parser.error(f'The {mae_file} file can not be found.') sys.exit(1) elif os.path.exists(pdb_file): parser.error(f'The {pdb_file} file already exists and ' 'would automatically be overwritten.') sys.exit(1) input_sts.append(structure.Structure.read(mae_file)) prep_md = not jobutils.get_option(options, FLAG_NO_PREP_MD) if prep_md: options = cgff.validate_options(parser, args, FLAG_FORCE_FIELD, FLAG_CGFFLD_LOCATION_TYPE, input_sts=input_sts) job_name = jobutils.get_jobname(default_job_name) logger, log_file_name = textlogger.create_logger(related_filename=job_name) textlogger.log_initial_data(logger, options) packmol_files = jobutils.get_option(options, FLAG_PACKMOL_INPUT_FILES) box_lengths = jobutils.get_option(options, FLAG_BOX_LENGTHS) # check PBC if len(packmol_files) != len(box_lengths): msg = ('Box lengths must be specified for each packmol input file.') parser.error(msg) sys.exit(1) input_files = dict(zip(packmol_files, box_lengths)) sts = { pdb_file: structure.Structure.read(mae_file) for mae_file, pdb_file in st_files } allow_failures = not jobutils.get_option(options, FLAG_NO_FAILURES) msg = None output_files = [] if not prep_md: try: cells = get_cells(input_files, sts, allow_failures=allow_failures) except RuntimeError as err: msg = str(err) else: for input_file, st in cells.items(): base_name = fileutils.get_basename(input_file) output_file = base_name + '_out.mae' st.write(output_file) output_files.append(output_file) else: force_field = jobutils.get_option(options, FLAG_FORCE_FIELD) water_force_field = jobutils.get_option(options, WATER_FF_TYPE_FLAG) cg_ff_loc_type = jobutils.get_option(options, FLAG_CGFFLD_LOCATION_TYPE) try: cell_files = write_desmond_cells( input_files, sts, allow_failures=allow_failures, force_field=force_field, water_force_field=water_force_field, cg_ff_loc_type=cg_ff_loc_type) except RuntimeError as err: msg = str(err) else: output_files.extend(cell_files.values()) if msg: logger.error(msg) sys.exit(msg) for output_file in output_files[:-1]: jobutils.add_outfile_to_backend(output_file) jobutils.add_outfile_to_backend(output_files[-1], set_structure_output=True)
[docs]def get_surfactant_infos(slb_dict): """ Return surfactant information from the given dictionary of structured liquid builder options. Maestro files referenced within must exist in the current working directory. :type slb_dict: dict :param slb_dict: contains structured liquid builder options :raise RuntimeError: if there is an issue :rtype: list[SurfactantInfo], list[str] :return: the surfactant information objects and any extra structure file flags """ try: surfactants_dict = slb_dict[SURFACTANTS] except KeyError: raise RuntimeError('The structured liquid builder input must contain ' f'a "{SURFACTANTS}" section.') cleaner = jobutils.StringCleaner() lipid_st_dict = desmondutils._get_lipid_ff_st_dict() surfactant_infos = [] extra_structure_file_flags = [] for mae_file, surfactant_kwargs in surfactants_dict.items(): if os.path.exists(mae_file): st = structure.Structure.read(mae_file) else: lipid_st_title = mae_file st = lipid_st_dict.get(lipid_st_title) if not st: raise RuntimeError(f'The {mae_file} can not be found.') else: st.write(f'{lipid_st_title}.mae') jobutils.add_outfile_to_backend(f'{lipid_st_title}.mae') flags = [ FLAG_STRUCTURE_FILE, f'{lipid_st_title}.mae', f'{lipid_st_title}.pdb' ] extra_structure_file_flags.extend(flags) name = cleaner.cleanAndUniquify(st.title) packing_pct = surfactant_kwargs.get(PACKING_PCT) number = surfactant_kwargs.get(NUMBER) if not (bool(packing_pct) ^ bool(number)): raise RuntimeError( f'Only one of either "{PACKING_PCT}" or "{NUMBER}" ' 'can be specified.') elif packing_pct: packing_pct = float(packing_pct) elif number: number = int(number) try: layer = surfactant_kwargs[LAYER] hydrophilic_idxs = get_idxs(surfactant_kwargs[HYDROPHILIC_IDXS]) hydrophobic_idxs = get_idxs(surfactant_kwargs[HYDROPHOBIC_IDXS]) except KeyError: raise RuntimeError( f'For each surfactant all of {LAYER}, ' f'{HYDROPHILIC_IDXS}, and {HYDROPHOBIC_IDXS} must be specified.' ) cion_file_name = surfactant_kwargs.get(CION_FILE_NAME) if cion_file_name: cion_st = structure.Structure.read(cion_file_name) cion_name = cleaner.cleanAndUniquify(cion_st.title) else: cion_st = None cion_name = None surfactant_info = SurfactantInfo(st=st, name=name, packing_pct=packing_pct, number=number, layer=layer, hydrophilic_idxs=hydrophilic_idxs, hydrophobic_idxs=hydrophobic_idxs, cion_st=cion_st, cion_name=cion_name) surfactant_infos.append(surfactant_info) return surfactant_infos, extra_structure_file_flags
[docs]def get_solvent_infos(slb_dict): """ Return solvent information from the given dictionary of structured liquid builder options. Maestro files referenced within must exist in the current working directory. :type slb_dict: dict :param slb_dict: contains structured liquid builder options :raise RuntimeError: if there is an issue :rtype: list[SolventInfo] :return: the solvent information objects """ try: solvents_dict = slb_dict[SOLVENTS] except KeyError: raise RuntimeError('The structured liquid builder input must contain ' f'a "{SOLVENTS}" section.') cleaner = jobutils.StringCleaner() solvent_infos = [] for mae_file, solvent_kwargs in solvents_dict.items(): if not os.path.exists(mae_file): raise RuntimeError(f'The {mae_file} can not be found.') st = structure.Structure.read(mae_file) name = cleaner.cleanAndUniquify(st.title) packing_pct = solvent_kwargs.get(PACKING_PCT) number = solvent_kwargs.get(NUMBER) if not (bool(packing_pct) ^ bool(number)): raise RuntimeError( f'Only one of either "{PACKING_PCT}" or "{NUMBER}" ' 'can be specified.') elif packing_pct: packing_pct = float(packing_pct) elif number: number = int(number) layer = solvent_kwargs.get(LAYER, HYDROPHILIC) if layer not in [HYDROPHILIC, HYDROPHOBIC]: raise RuntimeError(f'A solvent {LAYER} must be either ' f'{HYDROPHILIC} or {HYDROPHOBIC}.') solvent_info = SolventInfo(st=st, name=name, packing_pct=packing_pct, number=number, layer=layer) solvent_infos.append(solvent_info) return solvent_infos
[docs]def write_packmol_input_file(slb_dict, box_lengths): """ Write the packmol input file from the given dictionary of structured liquid builder options. :type slb_dict: dict :param slb_dict: contains structured liquid builder options :type box_lengths: tuple[float] :param box_lengths: the cell lengths in Angstrom :raise RuntimeError: if there is an issue :rtype: str, list[str] :return: the packmol input file name and any extra structure file flags """ surfactant_infos, extra_structure_file_flags = get_surfactant_infos( slb_dict) solvent_infos = get_solvent_infos(slb_dict) seed = slb_dict.get(SEED, DEFAULT_SEED) n_loop = slb_dict.get(N_LOOP, DEFAULT_N_LOOP) packing_f = slb_dict.get(PACKING_F, DEFAULT_PACKING_F) model_type = slb_dict.get(MODEL_TYPE, BILAYER) model_kwargs = slb_dict.get(MODEL_KWARGS, {}) job_name = jobutils.get_jobname(DEFAULT_JOB_NAME) # create packmol input file writer = get_writer(surfactant_infos, solvent_infos, job_name, seed, n_loop, box_lengths, packing_f, model_type, **model_kwargs) try: writer.addStructureBodies() except PackmolInputFileException as err: raise RuntimeError(str(err)) packmol_file = writer.write(input_base_name=job_name) jobutils.add_outfile_to_backend(packmol_file) return packmol_file, extra_structure_file_flags