"""Script to setup cosolvent system"""
import os
from pathlib import Path
from typing import List
from typing import Optional
from typing import Tuple
from functools import cached_property
import numpy as np
import schrodinger.application.desmond.system_builder_inp as sb_inp
from schrodinger import structure
from schrodinger.application.desmond import constants
from schrodinger.application.desmond import envir
from schrodinger.application.desmond import struc
from schrodinger.application.desmond.cms import get_box
from schrodinger.job import jobcontrol
from schrodinger.structure import Structure
from schrodinger.structutils.analyze import evaluate_asl
from schrodinger.utils import fileutils
from schrodinger.utils import log
logger = log.get_output_logger(name="mixed_solvent")
WATER_MOLECULAR_MASS = 18.01528
BIG_BOX_BUFFER = 15.0
SHRINK_BOX_BY = 0.01
BOX_DATA = Path(envir.CONST.MMSHARE_SYSTEM_BUILDER_DATA_DIR) / 'mxmd'
DEFAULT_PROBES_STR = "acetonitrile,isopropanol,pyrimidine"
SYS_BUILDER = os.path.join(envir.CONST.SCHRODINGER_UTILITIES, "system_builder")
BUILTIN_PROBE_NAMES = [
box_fname.replace('.box.mae', '') for box_fname in os.listdir(BOX_DATA)
]
[docs]class Cosolvent:
[docs] def __init__(self, filename: str):
"""
`filename` should follow the pattern `<probe_name>.box.mae`
"""
self._filename = filename
self.name = Path(filename).name[:-8] # strip off .box.mae
@cached_property
def box_strucs(self) -> List[Structure]:
return list(structure.StructureReader(self._filename))
@property
def _ct0(self):
return self.box_strucs[0]
@cached_property
def probe_pdbres(self):
res = next(iter(self._ct0.residue))
return res.pdbres
@cached_property
def density(self) -> float:
"""
Return average density of the box in g/cm^3
"""
# FIXME: AU_TO_KG is a misleading name
# but the return value has the correct units of g/cm^3.
mass_kg = self._ct0.total_weight * constants.Conversion.AU_TO_KG
all_boxes = [
np.array(get_box(ct)).reshape((3, 3)) for ct in self.box_strucs
]
vol = np.array([np.linalg.det(box) for box in all_boxes])
return np.mean(mass_kg / vol)
@cached_property
def probe_nheavy(self) -> int:
"""
Return number of heavy atoms per probe molecule
"""
return len(
[a for a in self._ct0.molecule[1].atom if a.atomic_number > 1])
@cached_property
def probe_mass(self) -> float:
"""
Return molecular weight of the probe
"""
mol = self._ct0.molecule[1]
return sum([a.atomic_weight for a in mol.atom])
@property
def filename(self):
return self._filename
[docs]def get_probe_paths(probe_names: List[str],
custom_probe_dir: Optional[str]=None) \
-> Tuple[List[str], List[str], List[str]]:
"""
Process the given probe names and return paths to their .box.mae file.
Additionally return any missing probes.
:return: A 3-tuple of two lists containing probe paths for custom probe and
builtin probes and a list containing probe names with missing files.
"""
custom = []
builtin = []
missing_probes = []
for probe_name in probe_names:
if custom_probe_dir:
probe_path = _get_probe_path(probe_name, custom_probe_dir)
if Path(probe_path).exists():
custom.append(probe_path)
continue
builtin_probe_name = _get_probe_path(probe_name)
if Path(builtin_probe_name).exists():
builtin.append(builtin_probe_name)
continue
missing_probes.append(probe_name)
return custom, builtin, missing_probes
def _get_probe_path(probe_name: str, probe_dir: str = BOX_DATA):
"""
Get a path to the given probe's solvent box structure file
"""
return os.path.join(probe_dir, f"{probe_name}.box.mae")
[docs]def convert_vv2mol(probe: Cosolvent, target_vv_ratio: float = 5.0) -> int:
'''
This function reports the number of water molecules required to maintain
the required volume/volume ratio for each molecules of the probe
Assumes that density is provided in gm/cm3 molecular mass also should be
specified in gm/mol
:param probe_name: Name of the cosolvent used to build mixed-solvent box
:param target_vv_ratio: Volume over volume ratio in percent
'''
return int((100.0 - target_vv_ratio) / WATER_MOLECULAR_MASS) / (
(target_vv_ratio * probe.density) / probe.probe_mass)
[docs]class CosolventBoxGenerator:
"""
Example::
gen = CosolventBoxGenerator(
inp_fname,
'probe_directory/acetonitrile.box.mae',
1,
cosolvent_layer_size=5.0,
cosolvent_volume_ratio=2.0)
gen.generate(out_fname)
"""
[docs] def __init__(self,
solute_fname: str,
probe_path: str,
box_number: int,
init_water_buffer: float = BIG_BOX_BUFFER,
cosolvent_layer_size: float = 7.0,
cosolvent_volume_ratio: float = 5.0,
cosolvent_vdw_scaling: Optional[float] = None,
water='SPC'):
self._solute_fname = Path(solute_fname).absolute()
self._box_number = box_number
self._init_water_buffer = init_water_buffer
self._water_model = water
self._vdw_scaling = cosolvent_vdw_scaling
self._layer_size = cosolvent_layer_size
self._volume_ratio = cosolvent_volume_ratio
self._probe = Cosolvent(probe_path)
self._probe_name = self._probe.name
[docs] def generate(self, fname: str):
mx_strucs = self._generate_cosolvent_box()
if not mx_strucs:
return
self._shrink_water_box(mx_strucs, self._volume_ratio)
with structure.StructureWriter(fname) as ct_writer:
for ct in mx_strucs:
ct_writer.append(ct)
def _generate_cosolvent_box(self) -> List[Structure]:
'''
Creates solvated mixed solvent system in three steps:
1) Solvate the protein with cosolvent, and extract a layer of cosolvents
around the protein
2) Solvate the protein-cosolvent system with specified water. Intentionally
use a larger box so that it can laber be shrunk to match the targeted
volume over volume (v/v) ratio.
3) Extract cosolvent from solute CT into a separate 'cosolvent' CT.
'''
with fileutils.in_temporary_directory():
logger.info(f'Temp dir set to: {os.getcwd()}')
layered_box_fname = self._make_layered_box()
layered_shell_fname = self._make_layered_shell(layered_box_fname)
mx_strucs = solvate_cosolvent(layered_shell_fname,
self._water_model.lower(),
self._init_water_buffer)
self._split_cosolvent_from_solute(mx_strucs)
return mx_strucs
def _make_layered_box(self) -> Path:
layered_box_fname = Path('layered_box.mae')
csb_fn = 'create_cosolvent_box.csb'
cosolvent_struc = self._probe.box_strucs[self._box_number]
cosolvent_box_fname = 'cosolvent_box.mae'
cosolvent_struc.write(cosolvent_box_fname)
# backup reference coordinates for solute
solute_ct = Structure.read(self._solute_fname)
struc.set_ct_reference_coordinates(solute_ct)
out_solute_fname = 'out_solute.mae'
solute_ct.write(out_solute_fname)
system = sb_inp.SystemBuilderInput()
system.setSolute(out_solute_fname)
# move non zero-ordered water molecules from solute to solvent CT
system.treatSolventFromSolute()
cosolvent_vdw_scaling = (self._vdw_scaling or
1.0 - (self._probe.probe_nheavy * 0.07)
) # yapf: disabled
system.setVdwScalingFactor(cosolvent_vdw_scaling)
system.setSolvent(cosolvent_box_fname)
system.setNeutralize(0)
system.setSoluteAlignment('rezero')
system.setWriteMaeFile(str(layered_box_fname))
system.setBoundaryCondition(use_buffer=1,
boundary_condition='cubic',
a=self._layer_size)
system.write(csb_fn)
cmd = [SYS_BUILDER, csb_fn]
logger.info(
f'Creating a box of {self._probe_name} molecules around the '
f'protein, with vdw scaling: {cosolvent_vdw_scaling:.3f}')
job = jobcontrol.launch_job(cmd, print_output=True, launch_dir='.')
job.wait()
return layered_box_fname
def _make_layered_shell(self, layered_box_fname: Path) -> Path:
full_system_ct = structure.Structure.read(layered_box_fname, index=1)
layered_shell_fn = Path('layered_shell_raw.mae')
logger.info(
f"Converting the cosolvent box "
f"({self._probe.filename}, "
f"index={self._box_number}) to a layer of cosolvent molecules around "
f"the protein.")
# Keep cosolvents within a radius and delete xtal waters
full_system_ct.deleteAtoms(
evaluate_asl(
full_system_ct,
f'(fillmol beyond {self._layer_size} (protein or nucleic_acids)) '
'or atom.i_desmond_crystal_water 1'))
full_system_ct.title = structure.Structure.read(
self._solute_fname).title
full_system_ct.write(layered_shell_fn)
logger.info(f'Written {layered_shell_fn}')
return layered_shell_fn
def _split_cosolvent_from_solute(self, mx_strucs: List[Structure]) -> None:
'''
Split cosolvent molecules from solute CT into a seaparate CT. This CT is
then inserted before solvent CT. Input `mx_strucs` will be mutated.
:param mx_strucs: List of structures that assumes the same order as in Cms
file structure (fullsystem, solute, solvent CTs)
:param probe_name: name of the cosolvent used to build mixed-solvent box
'''
probe_name = self._probe_name
SOLUTE_CT_IDX = 1
cosol_atom_indices = evaluate_asl(
mx_strucs[1], f'res.ptype "{self._probe.probe_pdbres}"')
if not cosol_atom_indices:
mx_strucs.clear()
return
cosolvent_ct = mx_strucs[1].extract(cosol_atom_indices)
mx_strucs[SOLUTE_CT_IDX].deleteAtoms(cosol_atom_indices)
cosolvent_ct.property[
constants.CT_TYPE] = constants.CT_TYPE.VAL.COSOLVENT
cosolvent_ct.property[constants.NUM_COMPONENT] = 1
cosolvent_ct.title = f'Cosolvent - {probe_name}'
cosolvent_ct.property[constants.MXMD_COSOLVENT_PROBE] = probe_name
for prop in constants.SIM_BOX:
cosolvent_ct.property[prop] = mx_strucs[SOLUTE_CT_IDX].property[
prop]
for ires, res in enumerate(cosolvent_ct.residue, start=1):
res.resnum = ires
# Insert cosolvent CT to be before the solvent CT
mx_strucs.insert(-1, cosolvent_ct)
def _shrink_water_box(self, mx_strucs: List[Structure],
target_vv_ratio: float) -> None:
"""
Shrink the water box to match volume-over-volume ratio
:param mx_strucs: List of structures that assumes the same order as in
Cms file structure (fullsystem, solute, solvent CTs)
:param target_vv_ratio: Volume over volume ratio in percent
"""
cosol_ct_idx, wat_ct_idx = -2, -1
num_cosolvents = mx_strucs[cosol_ct_idx].mol_total
nwat = mx_strucs[wat_ct_idx].mol_total
required_nwat = int(num_cosolvents *
convert_vv2mol(self._probe, target_vv_ratio))
logger.info(
'Shrinking the water box to match volume over volume ratio of '
f'{target_vv_ratio}%. Current box size is {self._init_water_buffer}'
)
shrink_cosolvent_box(mx_strucs, required_nwat)
logger.info(
'Done generating mixed-solvent box with targeted volume/volume ratio.'
)
[docs]def solvate_cosolvent(
inp_mae: Path,
water_model: str = 'spc',
init_water_buffer=BIG_BOX_BUFFER) -> List[structure.Structure]:
"""
Solvate the cosolvent ct with water
:param inp_mae: The path to the input mae. This is a system containing the
cosolvent.
:param job_dir: A path to the directory to run the job in
:param water_model: The water type to use as the solvent
:param init_water_buffer: The initial size of water box
:return: Three structures from system_builder output: the fullsystem,
solute, and solvent
"""
# Solvate Protein-cosolvent CT with water
solvated_fn = 'solvated.mae'
csb_fn = 'create_water_box.csb'
inp = sb_inp.SystemBuilderInput()
inp.setSolute(str(inp_mae))
inp.setSolvent(f"{water_model}.box.mae")
inp.setNeutralize(0)
inp.setWriteMaeFile(solvated_fn)
inp.setBoundaryCondition(use_buffer=1,
boundary_condition='cubic',
a=init_water_buffer)
inp.write(csb_fn)
cmd = [SYS_BUILDER, csb_fn]
logger.info("Creating a solvated mixed-solvent system")
job = jobcontrol.launch_job(cmd, print_output=True)
job.wait()
logger.info(f"Mixed-solvent ({solvated_fn}) system created.")
return list(structure.StructureReader(solvated_fn))
[docs]def shrink_cosolvent_box(cts: List[structure.Structure], required_nwat: int):
"""
Shrink the system box so that it contains a given number of water molecules.
:param cts: The input structures in the order of:
[full system, non-solvent ct..., cosolvent ct, water ct]. These cts
are modified in place.
:param required_nwat: How many waters should remain after shrinking system
"""
fsys_ct, *non_water_cts, water_ct = cts
# delete excess waters, to match 50% weight/weight ratio
nwat = water_ct.mol_total
logger.info(f'Required number of water molecules is {required_nwat} for '
f'{non_water_cts[-1].mol_total} cosolvent molecules. '
f'Current system contains {nwat} waters.')
if required_nwat > nwat:
raise ValueError("Number of water molecules smaller than required. "
"Cannot shrink system")
half_box = water_ct.property[constants.SIM_BOX[0]] / 2.0
noh_wat_aid = evaluate_asl(water_ct, "water and not a.e H")
noh_wat_idx = [i - 1 for i in noh_wat_aid]
xyz = np.abs(water_ct.getXYZ()[noh_wat_idx])
while nwat > required_nwat:
nwat = len(xyz[np.max(xyz, axis=1) <= half_box])
half_box -= SHRINK_BOX_BY
# add half of SHRINK_BOX_BY for a guess closer to the target
half_box += SHRINK_BOX_BY / 2.0
logger.info('Cubic box length will be shrunk from '
f'{water_ct.property[constants.SIM_BOX[0]]:.3f} to '
f'{(half_box * 2):.3f} Angstroms, with {nwat} water molecules.')
# remove water molecues from solvent and full_system CTs
wat_atom_idx_to_delete = []
for mol in water_ct.molecule:
if np.max(np.abs(mol.atom[1].xyz)) > half_box:
wat_atom_idx_to_delete += mol.getAtomIndices()
water_ct.deleteAtoms(wat_atom_idx_to_delete)
# offset atom indices number of atoms prior to solvent block
offset = sum([ct.atom_total for ct in non_water_cts])
fsys_ct.deleteAtoms([idx + offset for idx in wat_atom_idx_to_delete])
# update box values for ax, by, cz, of a cubic box
for ct in cts:
for prop in constants.SIM_BOX_DIAGONAL:
ct.property[prop] = half_box * 2
if __name__ == '__main__': # pragma: no cover
# For quick test.
import sys
in_fname, out_fname, cosolvent_layer_size = sys.argv[1:4]
cosolvent_layer_size = float(cosolvent_layer_size)
gen = CosolventBoxGenerator(in_fname,
'acetonitrile',
1,
cosolvent_layer_size=cosolvent_layer_size,
cosolvent_vdw_scaling=1.0)
gen.generate(out_fname)