"""
Builder classes for making amorphous cells
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributor: Dave Geisen, Teng Zhang
import collections
import copy
import itertools
import logging
import math
import random
import string
import sys
from collections import OrderedDict
from collections import defaultdict
from collections import namedtuple
from past.utils import old_div
import networkx
import numpy
import psutil
import scipy.constants as constants
import scipy.spatial as spatial
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform
import schrodinger
from schrodinger import structure
from schrodinger.application.desmond.bennett import BOLTZMANN
from schrodinger.application.matsci import buildcomplex
from schrodinger.application.matsci import clusterstruct
from schrodinger.application.matsci import coarsegrain
from schrodinger.application.matsci import desmondutils
from schrodinger.application.matsci import graph
from schrodinger.application.matsci import jobutils
from schrodinger.application.matsci import msprops
from schrodinger.application.matsci import msutils
from schrodinger.application.matsci import textlogger
from schrodinger.application.matsci.nano import xtal
from schrodinger.forcefield import common
from schrodinger.infra import fast3d
from schrodinger.infra import mm
from schrodinger.infra import mmbitset
from schrodinger.infra import mmlist
from schrodinger.infra import structure as infrastructure
from schrodinger.job import jobcontrol
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import minimize
from schrodinger.structutils import ringspear
from schrodinger.structutils import smiles
from schrodinger.structutils import standard_nuclear_orientation
from schrodinger.structutils import transform
# Color constants
COLOR_BY_MOLECULE = 'molecule'
GREEN = 'green'
RED = 'red'
BLUE = 'user27'
YELLOW = 'user16'
CYAN = 'user4'
ORANGE = 'orange'
PINK = 'pink'
DARK_GREEN = 'user3'
MAGENTA = 'purple'
GREY = 'gray'
SEAFOAM = 'userM'
OLIVE = 'olive'
LIME = 'user19'
DEEPRED = 'brown'
DEEPTEAL = 'user59'
LIGHTBLUE = 'blue9'
SLATE = 'blue14'
YELLOWORANGE = 'userT'
HOTPINK = 'userN'
COLORS = [
GREEN, RED, BLUE, YELLOW, CYAN, ORANGE, PINK, DARK_GREEN, MAGENTA, GREY,
SEAFOAM, OLIVE, LIME, DEEPRED, DEEPTEAL, LIGHTBLUE, SLATE, YELLOWORANGE,
HOTPINK
]
TWO_PI = 2.0 * numpy.pi
OBEY_DENSITY = 'density'
OBEY_CLASH = 'clash'
OBEY_BOTH = 'both'
SCAFFOLD_ATOM_PROP = 'b_matsci_scaffold_atom'
OPLS2005 = mm.OPLS_NAME_F14
AMCELL_NO_SYSTEM_OUT = '-amcell.maegz'
INI = 'INI'
TRM = 'TRM'
CAS = 'CAS'
MON = 'A '
WILDCARD = '*'
DIHEDRAL_NUM = 73
CHAIN_ID_PROP = 'i_matsci_polymer_chain_id'
COUPLER_ATOM_PROP = 'b_matsci_polymer_coupler_atom'
GROWER_ATOM_PROP = 'b_matsci_polymer_grower_atom'
# Groups together information needed when joining two fragments
AttachmentAtoms = namedtuple(
'AttachmentAtoms', ['coupler', 'coupler_marker', 'grower', 'grow_marker'])
HEADFIRST = 'headfirst'
TAILFIRST = 'tailfirst'
# Tacticity constants
TACTICITY_ISO = 'Isotactic'
TACTICITY_SYN = 'Syndiotactic'
TACTICITY_ATAC = 'Atactic'
# Atom properties
HEAD_ATOM_PROP = msprops.HEAD_ATOM_PROP
TAIL_ATOM_PROP = msprops.TAIL_ATOM_PROP
HT_ATOM_PROPS = [HEAD_ATOM_PROP, TAIL_ATOM_PROP]
CASCADER_ATOM_PROP = 'b_matsci_polymer_cascader_atom'
CASCADER_MARKER_ATOM_PROP = 'b_matsci_polymer_cascader_marker_atom'
BRANCH_ATOM_PROP = 'b_matsci_polymer_branch_atom'
BRANCH_PCT_ATOM_PROP = 'r_matsci_polymer_branch_atom_pct'
BRANCH_GEN_ATOM_PROP = 'i_matsci_polymer_branch_atom_gen'
BRANCH_MAXGEN_ATOM_PROP = 'r_matsci_polymer_branch_atom_maxgen'
CHIRAL_BB_ATOM_PROP = 'b_matsci_polymer_chiral_bb_atom'
CHIRAL_NONE_MONOMER_PROP = 'b_matsci_polymer_monomer_chiral_none'
CARB_ATOM_IDENTIFIER_PROP = 's_matsci_carbohydrate_atom_identifier'
MARKER_START_PROP = 'b_matsci_polymer_marker_start'
MONOSACC_PROP = 's_matsci_polymer_monosaccharide_type'
# Role properties
MOIETY_PROP = 's_matsci_polymer_gui_moiety'
# Properties for initiators/terminators/cascaders/monomers for communication
# between GUI and backend
# Comma-delimited list of marker atom numbers
MARKER_PROP = 's_matsci_polymer_gui_marker_list'
# Set to one of the tacticity constants
TACTICITY_PROP = 's_matsci_polymer_gui_tacticity'
# The % branching probability for branching monomers
BRANCH_PCT_PROP = 'r_matsci_polymer_gui_branch_pct'
# The max number of branching genertions
BRANCH_MAX_PROP = 'i_matsci_polymer_gui_branch_max'
# Whether the backbone should be all trans
BBTRANS_PROP = 'b_matsci_polymer_gui_bb_trans'
# Number of cascade generations
CASCADE_GEN_PROP = 'i_matsci_polymer_gui_cascade_gen'
MONOMER_ORIG_IDX_PROP2 = 'i_matsci_polymer_monomer_orig_atom_idx2'
INITIATOR = 'initiator'
TERMINATOR = 'terminator'
CASCADER = 'cascader'
MONOMERX = 'monomer_x'
RIGID_BODY_PROP = 'b_matsci_rigid_body'
DEFAULT_VDW_SCALE = 0.5
# Chirality
R_S = [msprops.CHIRAL_R, msprops.CHIRAL_S]
PROP_BY_CHIRALITY = OrderedDict()
PROP_BY_CHIRALITY[msprops.CHIRAL_R] = msprops.CHIRAL_R_MONOMER_PROP
PROP_BY_CHIRALITY[msprops.CHIRAL_S] = msprops.CHIRAL_S_MONOMER_PROP
PROP_BY_CHIRALITY[None] = CHIRAL_NONE_MONOMER_PROP
ATOM_PROP_MARKER = [
HEAD_ATOM_PROP, TAIL_ATOM_PROP, CASCADER_ATOM_PROP, BRANCH_ATOM_PROP
]
UNIQUE_ATOM_PROP = coarsegrain.CG_UNIQUE_PARTICLE_LABEL_KEY
ATOM_PROP_MARKER_IDX = list(zip(ATOM_PROP_MARKER, [0, 1, 2, 2]))
ORIG_ATOM_IDX_PROP = 'i_matsci_orig_atom_idx'
MAX_HIT_NUM = 50
BOLTZMANN_DIS = 'Boltzmann at'
UNIFORM_DIS = 'Uniform'
MAX_VDW_SCALE = 0.5
ConnectionInfo = namedtuple('ConnectionInfo',
['missed_atom_ids', 'connecting_atom_ids'])
IMMERSED = 'immersed'
CONTAINER = 'container'
INTERFACE = 'interface'
# Convert AMU to kg, then kg to g (*1000) and A3 to CM3
AMU_TO_KG = constants.physical_constants['atomic mass constant'][0]
AMU_PER_A3_TO_G_PER_CM3 = AMU_TO_KG * 1000 * (1e8)**3
logger = None
[docs]def log_debug(msg):
"""
Add a message to debug logger file.
:type msg: str
:param msg: The message to debug logging
"""
global logger
if logger is None:
logger = textlogger.create_debug_logger(__name__)
logger.debug(msg)
def _add_head_and_tail_props_to_neighbors(struct, indexes):
"""
Mark the two given atoms with the polymer role HEAD (first atom) and TAIL
(second atom).
:param `schrodinger.structure.Structure` struct: The structure to mark
:param list indexes: [head atom index, tail atom index]
:raises RuntimeError: If len(indexes) != 2
"""
if len(indexes) != 2:
raise RuntimeError('indexes must have length = 2')
for index, role, capper_role in zip(
indexes, (msprops.HEAD, msprops.TAIL),
(msprops.HEAD_CAPPER, msprops.TAIL_CAPPER)):
atom = struct.atom[index]
atom.property[msprops.ROLE_PROP] = capper_role
for neighbor in atom.bonded_atoms:
neighbor.property[msprops.ROLE_PROP] = role
[docs]def has_head_tail_roles_marked(struct):
"""
Check if a monomer is marked with the polymer head and tail role properties
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to check
:rtype: bool
:return: True if head and tail role atoms are marked, False if not
"""
role_names = [
msprops.HEAD, msprops.TAIL, msprops.HEAD_CAPPER, msprops.TAIL_CAPPER
]
asl = ' or '.join([f'(atom.{msprops.ROLE_PROP} {x})' for x in role_names])
role_indexes = analyze.evaluate_asl(struct, asl)
if len(role_indexes) != 4:
return False
roles = [struct.atom[x].property[msprops.ROLE_PROP] for x in role_indexes]
return all(roles.count(x) == 1 for x in role_names)
[docs]def mark_monomer_head_tail_ce_th(struct):
"""
Find monomers marked with the Canvas method of marking head and tail - a Ce
atom marks the head and tha Th atom marks the tail. Mark the Ce atom with
the HEAD role and the Th atom with the TAIL role.
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to mark
:rtype: bool
:return: True if head and tail role atoms were marked, False if not
"""
ce_atoms = analyze.evaluate_asl(struct, 'atom.ele Ce and atom.att 1')
th_atoms = analyze.evaluate_asl(struct, 'atom.ele Th and atom.att 1')
if len(ce_atoms) != 1 or len(th_atoms) != 1:
return False
_add_head_and_tail_props_to_neighbors(struct, (ce_atoms[0], th_atoms[0]))
return True
[docs]def mark_monomer_head_tail_dummy(struct):
"""
Find monomers marked with the dummy atom method of marking head and tail -
dummy atoms mark both the head and tail. In this case there is nothing to
distinguish the two position, so mark the first dummy atom with the HEAD
role and the second dummy atom with the TAIL role.
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to mark
:rtype: bool
:return: True if head and tail role atoms were marked, False if not
"""
# Note - can't use ASL for dummies, see SHARED-6642
dummies = [
x.index
for x in struct.atom
if x.atomic_number == -2 and x.bond_total == 1
]
if len(dummies) != 2:
return False
_add_head_and_tail_props_to_neighbors(struct, dummies)
return True
[docs]def mark_monomer_head_tail_rx(struct):
"""
Find monomers marked with the polymer builder method of marking head and
tail - the Rx atom property is used to identify atoms. The atom designated
as R1 is marked as the HEAD role and the atom designated as R2 with the TAIL
role.
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to mark
:rtype: bool
:return: True if head and tail role atoms were marked, False if not
"""
rx_atoms, max_x_val = buildcomplex.get_marker_atom_indexes_from_structure(
struct)
# There must be exactly two Rx values, 1 and 2
if any((1 not in rx_atoms, 2 not in rx_atoms, len(rx_atoms) != 2)):
return False
# There must be only one atom of each type
for atoms in rx_atoms.values():
if len(atoms) != 1:
return False
indexes = [rx_atoms[x][0] for x in (1, 2)]
_add_head_and_tail_props_to_neighbors(struct, indexes)
return True
[docs]def mark_monomer_head_and_tail(struct):
"""
Find monomers marked with a variety of marking head and tail atoms and mark
them with the polymer role property. Note that the atoms so marked will be
the atoms to remove when building a polymer - the disposable H, dummy or
other atom attached to the heavy atom that remains in the monomer when the
polymer is built.
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to mark
:rtype: bool
:return: True if head and tail role atoms were marked, False if not
"""
return (has_head_tail_roles_marked(struct) or
mark_monomer_head_tail_ce_th(struct) or
mark_monomer_head_tail_rx(struct) or
mark_monomer_head_tail_dummy(struct))
[docs]def mark_orig_atom_idx(struct):
"""
Mark the original atom indexes as atom property.
:type struct: 'schrodinger.structure.Structure'
:param struct: each atom in this structure will be marked with atom index
as original atom index property.
"""
for atom in struct.atom:
atom.property[ORIG_ATOM_IDX_PROP] = atom.index
[docs]def get_orig_atom_idx(struct):
"""
Return the original atom indexes.
:type struct: 'schrodinger.structure.Structure'
or any 'schrodinger.structure._AtomCollection'
:param struct: the structure to get original atom indexes.
:rtype: list of int
:return: the original atom indexes of this atom collection.
"""
return [atom.property[ORIG_ATOM_IDX_PROP] for atom in struct.atom]
[docs]def get_missed_atom_id_groups(struct, atom_ids):
"""
Return the missed atom indexed grouped by bond connectivity.
:type struct: 'schrodinger.structure.Structure'
:param struct: the structure to get missed atom indexes groups
:type atom_ids: list of int
:param atom_ids: each int is an atom index of a missing atom in the
polymerfragment.
:rtype: list of list
:return: such sublist contains atom indexes that are bonded.
"""
struct = struct.copy()
mark_orig_atom_idx(struct)
sub_struct = struct.extract(atom_ids)
return [get_orig_atom_idx(mol) for mol in sub_struct.molecule]
[docs]def get_density(struct):
"""
Calculate the density of the struct.
:type struct: `schrodinger.structure.Structure`
:param struct: A structure with chorus_properties
:rtype: float
:return: The density of the struct in unit g/cm^3
"""
chorus_properties = xtal.get_chorus_properties(struct)
params = xtal.get_params_from_chorus(chorus_properties)
volume = xtal.get_volume_from_params(params)
return AMU_PER_A3_TO_G_PER_CM3 * struct.total_weight / volume
[docs]def get_maximum_vdw_radius(struct):
"""
Find the maximum VDW radius of any atom in the structure
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to check
:rtype: float
:return: The maximum VDW radius of any atom
"""
maxrad = 0.0
for atom in struct.atom:
maxrad = max(maxrad, atom.radius)
return maxrad
[docs]def get_color(index):
"""
Get the next color in our color rotation
:type index: int
:param index: The index in the color rotation to get. If index is greater
than the number of colors, we just start over
:rtype: str
:return: A color as understood by the
`schrodinger.structure._StructureAtom.color` property
"""
true_index = index % len(COLORS)
return COLORS[true_index]
[docs]def random_rotation(struct,
centroid=None,
max_rotation=TWO_PI,
no_rotation=False,
rotate_velocities=False):
"""
Randomly rotate struct in 3 dimensions
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to rotate
:type centroid: 3-element numpy array
:param centroid: the rotation center [x, y, z]
:type max_rotation: float
:param max_rotation: The maximum rotation to perform
:param bool no_rotation: If True, do not rotate the structure
:param bool rotate_velocities: If True, besides the structure, also rotate
FFIO atom velocities (if present)
:rtype: list, list
:return: Centroid and rotation matrix
"""
# Make sure the centroid is at the origin
if centroid is None:
centroid = transform.get_centroid(struct)[:3]
struct.setXYZ(struct.getXYZ() - centroid)
if no_rotation:
matrix = numpy.eye(4).tolist()
else:
# Obtain a random vector to rotate around and the amount to rotate
vec = [random.uniform(-0.5, 0.5) for x in range(3)]
nvec = transform.get_normalized_vector(numpy.array(vec))
angle = random.uniform(0.0, max_rotation)
# Get the rotation matrix and perform the rotation
matrix = transform.get_rotation_matrix(nvec, angle)
transform.transform_structure(struct, matrix)
if rotate_velocities:
for atom in struct.atom:
vel = msutils.get_atom_ffio_velocity(atom)
vel = transform.transform_atom_coordinates(vel, matrix)
msutils.set_atom_ffio_velocity(atom, vel)
# Move the structure back to its original centroid
struct.setXYZ(struct.getXYZ() + centroid)
return centroid, matrix
[docs]def prepare_building_block(struct, index, recolor=True, preserve_res=False):
"""
Do some prepwork on a component before adding them to the disordered cell
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to modify
:type index: int
:param index: The index of this structure in the list of components
:type recolor: bool
:param recolor: Whether to color the molecule a single color
:type preserve_res: bool
:param preserve_res: Whether to preserve the current residue information
"""
# Set properties of each structure to make it easy to distinguish
# the components in the full cell
if recolor:
color = get_color(index - 1)
# Ensure we stay within 4 character pdbres values (MATSCI-769)
if preserve_res:
set_pdbres = False
else:
new_pdbres = 'M%d' % index
new_pdbres = new_pdbres.ljust(4)
old_pdbres = struct.atom[1].pdbres.strip()
# Define->Molecule->Molecule type needs single residue (MATSCI-6087)
set_pdbres = not old_pdbres or old_pdbres == 'UNK' or len(
struct.residue) != 1
for atom in struct.atom:
if recolor:
atom.color = color
if set_pdbres:
atom.pdbres = new_pdbres
atom.resnum = 1
[docs]def detect_custom_mmffld_data(structs):
"""
Detect if a custom FEP charge block exists in the structure
:param list structs: List of `Structure` objects to check
:rtype: bool
:return: Whether a custom FEP charge block exists in any of the strucures
"""
has_data = False
for struct in structs:
# I'm not sure the attempt to use get_or_open_additional_data is needed
# in this case - in all tested cases when reading a structure from a
# file like we do here the get_unrequested call works fine, but this is
# the pattern used in other places so I'm keeping it here for safety
# until we better understand this.
try:
handle = mm.mmct_ct_m2io_get_unrequested_handle(struct)
except:
try:
handle = mm.mmct_ct_get_or_open_additional_data(struct, True)
except:
handle = None
if handle:
has_data = bool(
mm.m2io_get_number_blocks(handle, 'mmffld_custom_virt'))
if has_data:
break
return has_data
[docs]def get_generic_end_structure(is_coarse_grain=False):
"""
Get a single atom structure that can be used as a terminal
(initiator/terminator) moiety.
:type is_coarse_grain: bool
:param is_coarse_grain: whether the structure is coarse grain
:rtype: `schrodinger.structure.Structure`
:return: Structure of one single H or C atom with Br marker.
"""
struct = structure.create_new_structure()
struct.addAtom('C' if is_coarse_grain else 'H', 0, 0, 0)
struct.addAtom('Br', 1, 0, 0)
struct.addBond(1, 2, 1)
struct.property[MARKER_PROP] = '2'
# Without this line the H/C gets an "unknown atom type" color
buildcomplex.transmute_atom(struct.atom[1], struct.atom[1].element)
return struct
[docs]def dispersity(structs):
"""
Return dispersity of the structures.
Notes with Einstein notation (duplicated index means summation):
Mn = NiMi/sum(Ni); Mw = NiMiMi/NiMi
Mw/Mn = sum(Ni) * NiMiMi / (NiMi)^2
When Ni = 1, Mw/Mn = len(Mi) * MiMi /(sum(Mi))^2
:param structs: the list of structures to calculate dispersity.
:type structs: list of structure.Structure
:return: dispersity over all structures
:rtype: float
"""
mols = [mol for st in structs for mol in st.molecule]
mws = [mol.extractStructure().total_weight for mol in mols]
mws_squared = numpy.power(mws, 2)
return len(mws) * numpy.sum(mws_squared) / numpy.power(numpy.sum(mws), 2)
[docs]class FlorySchulzDistribution:
"""
Molecular weight distribution in linear step-growth homo polymers (single
monomer type or equimolar quantities for two types of monomers).
Collected and published by Polymer Properties Database (polymerdatabase.com)
Refs:
P. J. Flory, Journal of the American Chemical Society, 58, 1877-1885 (1936)
Wallace H. Carothers, Trans. Faraday Soc., Vol. 32, pp 39-49 (1936)
Paul L. Flory, Principles of Polymer Chemistry, Ithaca, New york, 1953
Paul J. Flory, Chem. Rev., Vol. 39, No. 1, 137 (1946)
Story (paraphrase):
Monomer conversion (p) is reactivated monomers over all initial monomers
An oligomer containing x repeat units must have undergone x-1 reactions
"""
[docs] def __init__(self, conversion=0.5, initial_num=1000, xmer=5):
"""
:param conversion: reactivated monomers over all initial monomers
:type conversion: float in [0, 1)
:param initial_num: the initial number of monomers
:type initial_num: positive int
:param xmer: repeat unit number in a chain
:type xmer: positive int
"""
self.conversion = conversion
self.xmer = xmer
self.initial_num = initial_num
[docs] def polydispersity(self):
"""
Carothers equation in step-growth polymerization, proposed by Wallace
Carothers, who invented nylon in 1935.
Conditions: two monomers in equimolar quantities
Ref: https://en.wikipedia.org/wiki/Carothers_equation
:return: polydispersity according to Carothers equation for two monomers
in equimolar quantities
:rtype: float
"""
return 1 + self.conversion
[docs] def xmerProbability(self):
"""
The number probability of x-mer at the specific conversion.
:return: The probability of a chain is x-mer long
:rtype: float
"""
return (1 - self.conversion) * pow(self.conversion, self.xmer - 1)
[docs] def moleculeNum(self):
"""
The number of molecules (polymer, oligomer, and monomers).
:return: the total number of molecules at the specific conversion.
:rtype: float
"""
return self.initial_num * (1 - self.conversion)
[docs] def degreeOfPolymerization(self):
"""
Degree of polymerization at the specific conversion.
:return: Degree of polymerization at the specific conversion.
:rtype: float
"""
return 1 / (1 - self.conversion)
[docs] def setConversion(self, deg_of_polym=2):
"""
Set the conversion based on degree of polymerization.
:param dp: Degree of polymerization
:type dp: int
"""
self.conversion = 1 - 1 / deg_of_polym
[docs] def xmerNum(self):
"""
The number of molecules of x-mer length at the specific conversion.
:return: number of molecules
:rtype: float
"""
return self.moleculeNum() * self.xmerProbability()
[docs] def numberDistribution(self, mer_lim=None):
"""
The number distribution: monomer length vs number probability.
:param mer_lim: the lower and upper limits of x-mers
:type mer_lim: None or a tuple of two int
:return: monomer length, probability
:rtype: 2d numpy array
"""
if mer_lim is None:
mer_lim = (1, 1000000)
mer_start, mer_end = list(map(int, mer_lim))
mer_num = mer_end - mer_start + 1
xmers = numpy.linspace(mer_start, mer_end, num=mer_num)
probs = (1 - self.conversion) * numpy.power(self.conversion, xmers - 1)
return numpy.vstack((xmers, probs)).T
[docs] def weightDistribution(self, mer_lim=None):
"""
The weight distribution: monomer length vs length-weighted probability.
:param mer_lim: the lower and upper limits of x-mers
:type mer_lim: None or a tuple of two int
:return: monomer length, length-weighted probability
:rtype: 2d numpy array
"""
xmer_prop = self.numberDistribution(mer_lim=mer_lim)
xmer_prop[:, 1] *= xmer_prop[:, 0]
xmer_prop[:, 1] *= (1 - self.conversion)
return xmer_prop
[docs]class Box(object):
"""
Class that defines the region a new structure may be placed in
"""
X, Y, Z = list(range(3))
AXES = [X, Y, Z]
[docs] def __init__(self, vertices):
"""
Create a box object
:type vertices: list
:param vertices: A six item list that defines the min and max value of
X, Y and Z. The six values in order are: Xmin, Xmax, Ymin, Ymax, Zmin,
Zmax
"""
self.vertices = vertices
[docs] def getMinMax(self, axis):
"""
Get the min and max values for axis
:type axis: int
:param axis: One of the class constants X, Y or Z
:rtype: (float, float)
:return: The min and max value for the requested axis
"""
return self.vertices[axis * 2], self.vertices[axis * 2 + 1]
[docs] def getLargestSpan(self):
"""
Get the largest span of the box in the X, Y or Z direction
:rtype: float
:return: The largest span (delta between the min and max value)
"""
spans = []
for axis in self.AXES:
amin, amax = self.getMinMax(axis)
spans.append(amax - amin)
return max(spans)
[docs] def getCentroid(self):
"""
Get the centroid of the box
:rtype: `numpy.array`
:return: The centoid of the box - a numpy array of 3 items (x, y, z)
"""
centroid = []
for axis in self.AXES:
amin, amax = self.getMinMax(axis)
centroid.append(old_div((amin + amax), 2.))
return numpy.array(centroid)
[docs] def getPointInBox(self):
"""
Get a point in a random position in the box
:rtype: list
:return: [x, y, z] coordinates of a random point in the box
"""
return [random.uniform(*self.getMinMax(a)) for a in self.AXES]
[docs] def isPointInBox(self, point):
"""
Is the given point inside the box
:type point: iterable
:param point: An iterable of length 3 floats that gives the x, y and z
values of the point in question
:rtype: bool
:return: Whether the point falls within the box
"""
for axis in self.AXES:
amin, amax = self.getMinMax(axis)
if not amin <= point[axis] < amax:
return False
return True
[docs] def getTranslationToBox(self, point):
"""
Return a vector that translates the mirror image of a point inside the
box
:type point: iterable
:param point: An iterable of length 3 floats that gives the x, y and z
values of the point in question
:rtype: list
:return: A list of 3 floats that gives the x, y and z coordinates of the
mirror image of point that lies within the box
"""
newpoint = []
for axis in self.AXES:
amin, amax = self.getMinMax(axis)
width = amax - amin
value = point[axis]
while value >= amax:
value = value - width
while value < amin:
value = value + width
newpoint.append(value)
return [b - a for a, b in zip(point, newpoint)]
[docs] def isValidPoint(self, point):
"""
Check if point is a valid point - the default implementation just
returns True. Subclasses may use this to weed out points that violate
custom box conditions
"""
return True
[docs]class BoxWithInnerHull(Box):
"""
A cubic Box object that contains an inner non-cubic region that limits the
valid volume for the components to be placed in.
"""
[docs] def __init__(self, hull, *args, **kwargs):
"""
Create a BoxWithInnerHull object
:type hull: `scipy.spatial.Delaunay`
:param hull: The convex hull defining the region of space the components
are allowed to be placed in
See parent class for additional documentation
"""
Box.__init__(self, *args, **kwargs)
self.hull = hull
[docs] def isPointInBox(self, point, *args):
"""
Overrides the parent method to check to see if the given point is also
inside the hull
:type point: iterable
:param point: An iterable of length 3 floats that gives the x, y and z
values of the point in question
:rtype: bool
:return: Whether the point falls within the box
"""
raw_check = Box.isPointInBox(self, point, *args)
return raw_check and self.isPointInHull(point)
[docs] def isPointInHull(self, point):
"""
Check to see if the point is inside the hull
:type point: iterable
:param point: An iterable of length 3 floats that gives the x, y and z
values of the point in question
:rtype: bool
:return: Whether the point falls within the hull
"""
# This is how you check if a point is inside the complex hull
return self.hull.find_simplex(point) >= 0.0
[docs] def getPointInBox(self, max_attempts=1000):
"""
Overrides the parent class to get a point that is inside the hull rather
than just in the box
:type max_attempts: int
:param max_attempts: Maximum number of attempts to try to find a point
that is both inside the box and the hull
:rtype: list
:return: List of three floats that give the xyz coordinates of a point
in the hull
:raise ScaffoldError: If a point can't be found inside the hull
"""
for ptry in range(max_attempts):
point = Box.getPointInBox(self)
if self.isPointInHull(point):
return point
raise ScaffoldError('Unable to find points inside the substrate '
'container')
[docs] def isValidPoint(self, point):
"""
Overrides the parent method to check to make sure the point is inside
the hull
:type point: iterable
:param point: An iterable of length 3 floats that gives the x, y and z
values of the point in question
:rtype: bool
:return: Whether the point is inside the hull
"""
return self.isPointInHull(point)
[docs]class ScaffoldError(ValueError):
"""
Class for errors involving the scaffold input
"""
[docs]class Scaffold(object):
"""
A Scaffold is a structure that occupies space in a cluster of molecules but
is not considered part of the system itself. Scaffolds could be: 1) A
nanoparticle that has an amorphous system surrounding it or a protein
surrounded by a box of water molecules, 2) A container such as a zeolyte or
nanotube that holds an amorphous system within it, 3) a surface upon which a
disordered system is laid down.
"""
[docs] def __init__(self, struct=None, volume=None):
"""
Create a Scaffold object
:type struct: `schrodinger.structure.Structure` or None
:param struct: The scaffold structure, or None if there is no scaffold
for the cell.
"""
self.scaffold = struct
if volume is None:
self.volume = self.approximateVolume(self.scaffold)
else:
self.volume = volume
if self.scaffold:
self.centroid = transform.get_centroid(self.scaffold)[:3]
# Mark scaffold atoms
for atom in self.scaffold.atom:
atom.property[SCAFFOLD_ATOM_PROP] = True
self.has_rings = bool(self.scaffold.find_rings())
else:
self.centroid = [0, 0, 0]
self.has_rings = False
self.box = None
def __bool__(self):
"""
Causes scaffold objects to evaluate to False if the scaffold structure
evaluates to False
:rtype: bool
:return: A boolean cast of the scaffold property
"""
return bool(self.scaffold)
[docs] @staticmethod
def approximateVolume(input_struct,
vdw_scale=1.0,
seed=None,
sphere_radius=None,
sphere_origin=None,
buffer_len=2,
return_ratio=False):
"""
Computes the approximate volume of the scaffold molecule using a Monte
Carlo sampling algorithm. The alogorithm:
1) define a box or sphere that fully encloses the structure
2) randomly pick a point inside the shape
3) check if the point lies inside the VDW radius of an atom
4) iterate over steps 2 & 3 a bunch
5) The volume is shape_volume * fraction of points inside VDW radii
:type vdw_scale: float
:param vdw_scale: The VdW scale factor to apply to VdW radii when
checking to see if a point is "inside" an atom
:type seed: int or None
:param seed: the seed for random, if None then random is not re-seeded
:type sphere_radius: float
:param sphere_radius: specifies to sample points in a sphere of this radius
in Angstrom rather than the default which uses the smallest enclosing
box
:type sphere_origin: numpy.array
:param sphere_origin: the origin in Angstrom of the sphere used if
sampling points in a sphere
:type buffer_len: float
:param buffer_len: a shape buffer lengths in Angstrom
:type return_ratio: bool
:param return_ratio: whether to return the hit ratio as a percent
:rtype: float or float, float
:return: The approximate volume of a molecule in Angstrom^3 followed
by the hit ratio if return_ratio is used
:note: This is expensive, so is only done at class initialization. After
that, call getExcludedVolume to obtain the cached volume.
"""
if not input_struct:
return 0.0
struct = input_struct.copy()
standard_nuclear_orientation.standard_nuclear_orientation(struct)
# see MATSCI-7250 GO custom classes, where during STU tests
# it was found that different results where obtained for different
# numbers of processors, having to do with how the processes
# inherit random, best to use a custom random
if seed is not None:
my_random = random.Random()
my_random.seed(seed)
else:
my_random = random
if sphere_radius is None:
# Determine the box size
mins = [min(struct.getXYZ()[:, x]) - buffer_len for x in range(3)]
maxes = [max(struct.getXYZ()[:, x]) + buffer_len for x in range(3)]
spans = [maxes[x] - mins[x] for x in range(3)]
vol = spans[0] * spans[1] * spans[2]
get_coords = lambda: numpy.array(
[mins[x] + my_random.random() * spans[x] for x in range(3)])
else:
# Determine the sphere size
sphere_radius += buffer_len
vol = 4 * numpy.pi * sphere_radius**3 / 3
mean = 0.
std_dev = 1.
if sphere_origin is None:
sphere_origin = transform.get_centroid(struct)[:3]
def get_coords():
# taken from functionalize_nanoparticle_driver.py
vec = numpy.array(
[my_random.gauss(mean, std_dev) for i in range(3)])
vec = (sphere_radius * my_random.uniform(0, 1)**
(1. / 3.)) * transform.get_normalized_vector(vec)
return vec + sphere_origin
# Create a distance cell to find any atom within the max VDW radius of
# a point
maxrad = get_maximum_vdw_radius(struct)
cell = mm.mmct_create_distance_cell(struct.handle, maxrad * vdw_scale)
hits = 0
numtries = max(300 * struct.atom_total, 20000)
for tries in range(numtries):
coords = get_coords().tolist()
neighbors_mm = mm.mmct_query_distance_cell(cell, *coords)
neighbors = mmlist._mmlist_to_pylist(neighbors_mm)
mm.mmlist_delete(neighbors_mm)
for neighbor in neighbors:
atom = struct.atom[neighbor]
dist2 = sum([(a - b)**2 for a, b in zip(coords, atom.xyz)])
radius = atom.radius * vdw_scale
if dist2 <= radius * radius:
hits += 1
break
mm.mmct_delete_distance_cell(cell)
ratio = hits / numtries
if return_ratio:
return vol * ratio, 100 * ratio
else:
return vol * ratio
[docs] def getNewCell(self):
"""
Return a new structure that contains the scaffold molecule. If there is
scaffold structure, the returned structure is empty
:rtype: `schrodinger.structure.Structure`
:return: A structure object that is either empty or contains the
scaffold structure, depending on whether a scaffold structure exists or
not.
"""
cell = structure.create_new_structure()
if self.scaffold:
cell.extend(self.scaffold)
return cell
[docs] def getExcludedVolume(self):
"""
Get the amount of volume the scaffold occupies
:rtype: bool
:return: The volume computed by approximateVolume when this object was
created.
"""
return self.volume
[docs] def defineBox(self, volume):
"""
The default implementation is to define a cube with all sides equal that
is centered on the scaffold centroid.
:type volume: float
:param volume: The volume of the desired cube
:rtype: `Box`
:return: The Box object for this scaffold
"""
side = volume**(old_div(1, 3.0))
if self.scaffold:
halfside = old_div(side, 2.0)
vertices = []
for coord in self.centroid:
vertices += [coord - halfside, coord + halfside]
else:
vertices = [0, side] * 3
self.box = Box(vertices)
return self.box
[docs] def addPBCProperties(self, struct, volume):
"""
Add periodic boundary condition properties to the structure. This method
overwrites any existing periodic boundary condition properties,
including Desmond chorus and PDB space group properties.
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to add the properties to
:type volume: float
:param volume: The volume of the cubic periodic boundary condition
"""
side = volume**(old_div(1, 3.0))
aval = bval = cval = side
self.addDesmondPBC(struct, side)
self.addSpaceGroupPBC(struct, aval, bval, cval)
[docs] def addDesmondPBC(self,
struct,
ax,
ay=0.0,
az=0.0,
bx=0.0,
by=None,
bz=0.0,
cx=0.0,
cy=0.0,
cz=None):
"""
Add properties to the structure that define the periodic boundary
condition in the way Desmond wants it defined.
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to add the properties to
:type ax: float
:param ax: The value of the ax box property
:type ay: float
:param ay: The value of the ay box property. Defaults to 0.
:type az: float
:param az: The value of the az box property. Defaults to 0.
:type bx: float
:param bx: The value of the bx box property. Defaults to 0.
:type by: float
:param by: The value of the by box property. If not given, this value is
set the same as ax.
:type bz: float
:param bz: The value of the bz box property. Defaults to 0.
:type cx: float
:param cx: The value of the cx box property. Defaults to 0.
:type cy: float
:param cy: The value of the cy box property. Defaults to 0.
:type cz: float
:param cz: The value of the cz box property. If not given, this value is
set the same as ax.
"""
desmondutils.store_chorus_box_props(struct,
ax,
ay=ay,
az=az,
bx=bx,
by=by,
bz=bz,
cx=cx,
cy=cy,
cz=cz)
[docs] def addSpaceGroupPBC(self,
struct,
aval,
bval,
cval,
alpha=90.,
beta=90.,
gamma=90.,
space_group='P 1'):
"""
Add properties to the structure that define the periodic boundary
condition in the way Crystal wants it defined.
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to add the properties to
:type side: float
:param side: The length of one side of the cubic periodic boundary
condition
"""
xcl = xtal.Crystal
struct.property[xcl.SPACE_GROUP_KEY] = space_group
struct.property[xcl.A_KEY] = aval
struct.property[xcl.B_KEY] = bval
struct.property[xcl.C_KEY] = cval
struct.property[xcl.ALPHA_KEY] = alpha
struct.property[xcl.BETA_KEY] = beta
struct.property[xcl.GAMMA_KEY] = gamma
[docs] def getPBCMinMaxes(self):
"""
Get the length of one side of the periodic boundary condition
:type cell: `schrodinger.structure.Structure`
:param cell: The current cell with structures that have already been
placed
:rtype: list
:return: A six item list that defines the min and max value of
X, Y and Z. The six values in order are: Xmin, Ymin, Zmin, Xmax, Ymax,
Zmax. If not PBC box has been created, a list of all 0.0 is returned
"""
if self.box:
mins = []
maxes = []
for axis in self.box.AXES:
amin, amax = self.box.getMinMax(axis)
mins.append(amin)
maxes.append(amax)
return mins + maxes
else:
return [0.0] * 6
[docs] def getPointInBox(self):
"""
Get a point in a random position in the box
:rtype: list
:return: [x, y, z] coordinates of a random point in the box
"""
return self.box.getPointInBox()
[docs]class BuilderWithClashDetection(object):
"""
The base class for the amorphous builder classes
"""
[docs] def __init__(self,
basename='cell',
backend=None,
logger=None,
color=None,
vdw_scale=1.0):
"""
Create a Builder object
:type basename: str
:param basename: The base name for structure files created by this
builder
:type backend: `schrodinger.job.jobcontrol._Backend`
:param backend: The job control backend we are running under, or None if
not running under a backend
:type logger: `logging.Logger`
:param logger: The logger for this builder
:type color: str or None
:param color: Set to module constant COLOR_BY_MOLECULE to color the
structures in the cell by molecule
:type vdw_scale: float
:param vdw_scale: Scale factor for VdW radii during clash detection
"""
self.basename = basename
self.backend = backend
if self.backend is None:
self.backend = jobcontrol.get_backend()
self.logger = logger
self.vdw_scale = vdw_scale
self.color = color
self.has_rings = True
[docs] def log(self, msg, level=logging.INFO):
"""
Log a message
:type msg: str
:param msg: The message to log
:type level: `logging` constant
:param level: A log level constant from the `logging` module such as
INFO, WARNING, ERROR...
"""
textlogger.log(self.logger, msg, level=level)
[docs] def buildingBlocksHaveRings(self):
"""
Override in subclasses to check if any of the building blocks have
rings. If none do, it will be a waste of time to look for them in the
larger structure.
:rtype: bool
:return: If any of the building blocks have rings
"""
return self.has_rings
[docs] def findRings(self, struct):
"""
Find all the rings in the provided structure. Since ring-finding is
expensive, its best to pre-calculate them once. Since the coordinates
of the ring don't matter, we can use these rings over and over as long
as the atom numbering doesn't change.
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to find the rings for
:rtype: list
:return: List of `schrodinger.structure._Ring` objects in the given
structure.
"""
if self.buildingBlocksHaveRings():
return list(struct.ring)
else:
return []
[docs] def findRingSpears(self,
ring_struct,
spear_struct=None,
rings=None,
ring_based=True,
pbc=None):
"""
Find all cases where a bond spears a ring
:type ring_struct: `schrodinger.structure.Structure`
:param ring_struct: The structure containing the rings
:type spear_struct: `schrodinger.structure.Structure`
:param spear_struct: The structure containing the atoms that might spear
the rings. If not provided, ring_struct will be used.
:type rings: list
:param rings: Each item of the list is a `schrodinger.structure._Ring`
object. This is the list returned by the findRings() method. If not
provided, they will be calculated on the fly - which takes considerable
time. If findRingSpears will be run more than once on the same structure
(even if the geometry changes), the rings should be precalculated via
findRings and passed in via this parameter.
:type ring_based: bool
:param ring_based: Whether the returned dictionary should contain keys
that are atom indexes of the speared ring (True), or of the bond
spearing the ring (False)
:type pbc: None, infrastructure.PBC, or list
:param pbc: If periodic boundary conditions should be used, provide
either an infrastructure.PBC object or the parameters to construct one.
Allowed constructors:
* a, b, c : box lengths
* a, b, c, alpha, beta, gamma box : box lengths and angles
* ax, ay, az, bx, by, bz, cx, cy, cz : box vectors
:rtype: dict
:return: If ring_based=True, keys are an atom index of one of the atoms
in the speared ring, and values are the atom index of one of the atoms
in the spearing bond. If ring_based=False, the keys/values are flipped.
"""
if not rings:
rings = self.findRings(ring_struct)
if not rings:
# No rings to spear
return {}
spears = ringspear.check_for_spears(ring_struct,
spear_struct=spear_struct,
rings=rings,
first_only=False,
pbc=pbc)
# Convert the spears list to a list of clashes
clashes = {}
for spear in spears:
spear_at = spear.spear_indexes[0]
ring_at = spear.ring_indexes[0]
if ring_based:
clashes.setdefault(ring_at, []).append(spear_at)
else:
clashes.setdefault(spear_at, []).append(ring_at)
return clashes
[docs] def removeIgnoredClashes(self, all_clashes, ignored_clashes):
"""
Get only those clashes that are not ignored.
:type all_clashes: dict
:param all_clashes: All found clashes
:type ignored_clashes: dict
:param ignored_clashes: Clashes that should be ignored
:rtype: dict
:return: Those clashes in all_clashes that are not in ignored_clashes.
Keys are atom indexes, values are all the atom indexes that clash with
that atom
"""
used_clashes = {}
for clashee, clashers in all_clashes.items():
if clashee not in ignored_clashes:
# All of this atom's clashes are not ignored
used_clashes[clashee] = clashers
else:
# Find if any of this atom's clashes are not ignored
raw_clashers = set(ignored_clashes[clashee])
for clasher in clashers:
if clasher not in raw_clashers:
used_clashes.setdefault(clashee, []).append(clasher)
return used_clashes
[docs] def getInfraStructure(self, struct):
"""
Get an infrastructure Structure object and an associated bitset
struct.atom_total long
:type struct: `schrodinger.structure.Structure`
:param struct: The python module Structure object
:rtype: tuple
:return: First item of the tuple is a
`schrodinger.infra.structure.Structure` object. Second item is a bitset
that is struct.atom_total long
"""
infra_struct = infrastructure.Structure_get_structure(struct)
bitset = mmbitset.Bitset(size=struct.atom_total)
bitset.fill()
return infra_struct, bitset
[docs] def getClashes(self,
struct1,
cutoff,
struct2=None,
pbc=None,
check_14=False):
"""
Find clashes - either intrastructure if struct2 is None, or
interstructure if struct2 is not None.
:type struct: `schrodinger.structure.Structure`
:param struct: The structure for intrastructure clashes or the first
structure for interstructure clashes
:type cutoff: float
:param cutoff: The cutoff for finding clashes. This value is multipied
times the sum of the VDW radii of the two atoms. If the distance is less
than this scaled VDW radii sum, a clash exists
:type struct2: `schrodinger.structure.Structure`
:param struct2: The second structure for interstructure clashes
:type pbc: None, infrastructure.PBC, or list
:param pbc: If periodic boundary conditions should be used, provide
either an infrastructure.PBC object or the parameters to construct one.
Allowed constructors:
* a, b, c : box lengths
* a, b, c, alpha, beta, gamma box : box lengths and angles
* ax, ay, az, bx, by, bz, cx, cy, cz : box vectors
:type check_14: bool
:param check_14: If False, the atom pairs separated by 3 covalent bonds
are excluded for clash check.
:rtype: dict
:return: keys are atom indexes in struct1 (or struct2 if defined), and
values are all the atom indexes in struct1 that clash with that atom
"""
# Create any necessary periodic boundary condition
if pbc and not isinstance(pbc, infrastructure.PBC):
pbc = infrastructure.PBC(*pbc)
# Set up the parameters for finding clashes
hbonds = None
salt_bridges = None
cparams = infrastructure.ContactParams()
cparams.setCutoff(cutoff)
cparams.setShow1_4(check_14)
# Convert to infrastructure Structures and find clashes
infra_struct1, bitset1 = self.getInfraStructure(struct1)
if struct2:
infra_struct2, bitset2 = self.getInfraStructure(struct2)
contacts = infrastructure.get_contacts(infra_struct1, bitset1,
infra_struct2, bitset2,
cparams, hbonds,
salt_bridges, pbc)
else:
contacts = infrastructure.get_contacts(infra_struct1, bitset1,
cparams, hbonds,
salt_bridges, pbc)
# Convert the returned clashes to a dict of clashes
all_clashes = {}
for contact in contacts:
atom1 = contact.getAtom1().getIndex()
atom2 = contact.getAtom2().getIndex()
all_clashes.setdefault(atom1, []).append(atom2)
return all_clashes
[docs] def checkForIntraStructureClashes(self,
struct,
scale=None,
pbc=None,
rings=None):
"""
Check for any intrastructure clashes
:type struct: `schrodinger.structure.Structure`
:param struct: The structure for intrastructure clashes
:type scale: float
:param scale: The cutoff for finding clashes. This value is multipied
times the sum of the VDW radii of the two atoms. If the distance is less
than this scaled VDW radii sum, a clash exists
:type pbc: None, infrastructure.PBC, or list
:param pbc: If periodic boundary conditions should be used, provide
either an infrastructure.PBC object or the parameters to construct one.
Allowed constructors:
* a, b, c : box lengths
* a, b, c, alpha, beta, gamma box : box lengths and angles
* ax, ay, az, bx, by, bz, cx, cy, cz : box vectors
:type rings: list
:param rings: The precalculated rings returned by findRings() on the
structure. If not supplied, they will be calculated on the fly.
:rtype: dict
:return: keys are atom indexes, and values are all the atom indexes
that clash with that atom. A clash may come from two atoms too close
together, or a ring that is speared by a bond.
"""
if not scale:
scale = self.vdw_scale
# Get the close atom clashes
clashes = self.getClashes(struct, scale, pbc=pbc)
# Add in the ring spears
ring_clashes = self.findRingSpears(struct, rings=rings, pbc=pbc)
for at1, at2 in ring_clashes.items():
clashes.setdefault(at1, []).extend(at2)
return clashes
[docs] @staticmethod
def countClashes(clashes):
"""
Count the total number of clashes
:type clashes: dict
:param clashes: keys are atom indexes, values are all the atom indexes
that clash with that atom
"""
return sum([len(x) for x in clashes.values()])
[docs] def colorByMolecule(self, struct):
"""
Color each molecule in struct a different color
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to color
"""
for mol in struct.molecule:
color = get_color(mol.number - 1)
for atom in mol.atom:
atom.color = color
[docs]def nearest_rotatable_bonds(struct,
rings,
backbone_atom_id,
side_atom_ids,
pre_atom_prop=BRANCH_ATOM_PROP,
atom_prop=HEAD_ATOM_PROP):
"""
Search the side groups of one atom and find the 'first neighbor'
rotatable bonds.
:type struct: `schrodinger.structure.Structure`
:param struct: structure to search bonds
:type rings: list
:param rings: List of ring atom index lists
:type backbone_atom_id: int
:param backbone_atom_id: the atom id of one backbone atom
:type side_atom_ids: list
:param side_atom_ids: list of atom ids of the side groups for the
backbone_atom
:type pre_atom_prop: str
:param pre_atom_prop: the property to be set True for the atom in the
rotatable bonds that is closer to the backbone_atom_id
:type atom_prop: str
:param atom_prop: the property to be set True for the atom in the
rotatable bonds that is deeper in side group
:rtype: list of tuple
:return: each tuple is two atom indexes, indicating 'first neighbor'
rotatable bonds in side groups for one atoms
"""
bonds_to_break = []
atom_ids_to_walk = [backbone_atom_id]
side_atom_id_set = set(side_atom_ids)
while atom_ids_to_walk:
pre_atom = struct.atom[atom_ids_to_walk.pop(0)]
for atom in pre_atom.bonded_atoms:
if atom.index not in side_atom_id_set:
continue
if analyze.is_bond_rotatable(struct.getBond(pre_atom.index,
atom.index),
rings=rings,
allow_methyl=True):
pre_atom.property[pre_atom_prop] = True
atom.property[atom_prop] = True
bonds_to_break.append(tuple([pre_atom.index, atom.index]))
else:
atom_ids_to_walk.append(atom.index)
side_atom_id_set.remove(atom.index)
return bonds_to_break
[docs]class BuilderGrowInCellMixin(object):
"""
A mixin for classes that uses in-cell polymer grow method.
"""
[docs] def setRingsForOneMol(self, new_init_frag, ringnum):
"""
Create `_Ring` objects for each fragment in one molecule.
:type new_init_frag: `PolymerFragment`
:param new_init_frag: the first fragment containing initiator
:type ringnum: int
:param ringnum: the total number of rings of all molecules in cell
"""
for frag in new_init_frag.fragments():
if frag.rings_atom_ids:
for ring_ids in frag.rings_atom_ids:
one_ring_struct = structure._Ring(self.cell, ringnum,
ring_ids, self.all_rings)
self.all_rings.append(one_ring_struct)
frag.rings.append(one_ring_struct)
[docs] def placeOneIniInCell(self, frag, new_chain=True):
"""
Place a new or dead chain fragment containing initiator into the cell.
If new_chain=True, randomly pick a position in cell; make sure the position
is away from pre-existing initiators; check contact of the newly added
fragments against all pre-existing atoms. If contact exists, randomly pick
anothor position.
If new_chain=False, find a random position in the largest void in cell;
check contact; if contact exists, loop over all the gridded positions in the
largest void.
:type frag: `PolymerFragment`
:param frag: the first fragment containing initiator
:type new_chain: bool
:param new_chain: if True, the fragment is from a new chain; else, the
fragment is from a dead chain.
:rtype: bool
:return: True, if the initiator fragment is successfully placed in cell
"""
all_xyz = self.cell.getXYZ()
self.cur_frag = frag
self.cur_bitset = mmbitset.Bitset.from_list(self.cell.atom_total,
self.cur_frag.atom_ids)
self.setDistanceCell()
for new_xyz in self.getPosition(new_chain):
copy_struct = frag.template_polymer.copy()
random_rotation(copy_struct, centroid=numpy.array([0, 0, 0]))
mol_xyz = copy_struct.getXYZ() + new_xyz
all_xyz[frag.atom_gids_in_mol, :] = mol_xyz
self.cell.setXYZ(all_xyz)
if not self.hasClashes():
# succeed to place the INI frag into cell
return True
# Centroids of INI frags are aways, but newly added INI
# frag has clashes with pre-existing atoms
# Shouldn't hit this at the beginning of grow_together,
# unless very short polymer with very large INI
else:
# reach the max trials to place INI frag into cell
return False
[docs] def initiateMultiMols(self, frags, grid_mids, margins, indexes):
"""
Place multiple initiators of the fragments into the cell.
:type frags: list of `PolymerFragment`
:param frags: the first fragments containing initiators
:param grid_mids: middle points on a grid layout
:type grid_mids: list
:param margins: the margin between two middle points
:type margins: list
:param indexes: the available grid indexes
:type indexes: list
:return: the fragment failed to be placed
:rtype: list of `PolymerFragment`
"""
self.distance_cell = infrastructure.DistanceCell(
self.cell.handle, self.max_distance, self.pbc)
if any(x.rings for x in frags):
self.ringspear_distance_cell = infrastructure.DistanceCell(
self.cell.handle, ringspear.ROUGH_CUT_DISTANCE, self.pbc)
# Rotate and move the molecules so that the initiators fit into the grids,
# and then filter out the ones clashing with the previous structure.
all_xyz = self.cell.getXYZ()
xyzs = self.getPositions(len(frags), grid_mids, margins, indexes)
failed_frags = []
for frag, xyz in zip(frags, xyzs):
copy_struct = frag.template_polymer.copy()
random_rotation(copy_struct, centroid=numpy.array([0, 0, 0]))
mol_xyz = copy_struct.getXYZ() + xyz
all_xyz[frag.atom_gids_in_mol, :] = mol_xyz
self.cell.setXYZ(all_xyz)
self.cur_bitset = mmbitset.Bitset.from_list(self.cell.atom_total,
frag.atom_ids)
self.cur_frag = frag
if self.hasClashes():
failed_frags.append(frag)
continue
self.bitSetOn(frag)
for nfrag in frag.next_frags:
nfrag.continue_grow = True
if frag.next_frags:
self.frags_by_mol.append([frag.next_frags[:], 0])
continue
self.cur_struct_in_cell += 1
return failed_frags + frags[len(xyzs)::]
[docs] def getGrids(self, diameter):
"""
Get the grid layout related things.
:param diameter: the diameter of the largest initiator
:type diameter: float
:return: grid points, margins in three dims, available grid indexes
:rtype: dict, list, list
"""
if self.scaffold.scaffold_mode == IMMERSED:
# Without substrate, the scaffold box defines the space
vecs = self.pbc.getBoxVectors()
vrt = self.scaffold.box.vertices
origin = numpy.array([vrt[0], vrt[2], vrt[4]])
atoms_on = list(self.existing_atom_bitset)
if atoms_on:
# The align the grid centroid with the existing atom centroid
# bitset starts from index 1, and getXYZ() needs indexes from 0
gids = [i - 1 for i in atoms_on]
origin += self.cell.getXYZ()[gids, :].mean(axis=0)
elif self.scaffold.scaffold_mode == INTERFACE:
# The edges of the hull parallel to the pbc.getBoxVectors() are
# used to grid the void. The hull has vectors of the proper length but we
# don't know which one corresponds to which PBC vector. So we find the
# three hull vectors that are most closely parallel to the PBC vectors.
pbc_vecs = [
x / numpy.linalg.norm(x) for x in self.pbc.getBoxVectors()
]
points = self.scaffold.box.hull.points
hull_vecs = numpy.array(
[points[i] - points[0] for i in range(1, len(points))])
# Find the angles between each PBC vector and all hull vectors
angles = [[
abs(transform.get_angle_between_vectors(y, x))
for y in hull_vecs
]
for x in pbc_vecs]
# Find the hull vector that is most nearly parallel to each PBC vector
mangles = [min(x) for x in angles]
# Indexes of the hull vectors that are parallel to the PBC vectors
mangle_idxs = [x.index(y) for x, y in zip(angles, mangles)]
# Each vector has the length of the corresponding Hull vector and
# the direction of the PBC vector
vecs = [
x * numpy.linalg.norm(hull_vecs[y])
for x, y in zip(pbc_vecs, mangle_idxs)
]
origin = points[0]
lens = [numpy.linalg.norm(vec) for vec in vecs]
grid_num = [math.floor(x / diameter) for x in lens]
num = [i if i <= 2 else i - 1 for i in grid_num]
indexes = tuple(
itertools.product(*[[y + 0.5 for y in range(x)] for x in num]))
pts = {
x: origin + numpy.dot(numpy.array(x) / num, vecs) for x in indexes
}
margins = [(x - y * diameter) / y for x, y in zip(lens, num)]
return pts, margins, indexes
[docs] def getPositions(self, num, grid_mids, margins, indexes):
"""
Get positions based on grid with randomness
:param num: the number of position requested
:type num: int
:param grid_mids: the grid middle points
:type grid_mids: dict
:param margins: the margins for grid points in each dimension.
Intentionally design this room so that initiators can translationally
move away from the grid mid points.
:type margins: list of float
:param indexes: the available grid indexes
:type indexes: list
:return: list of random points
:rtype: list
"""
try:
sel_indexes = random.sample(indexes, num)
except ValueError:
# The requested sample number exceeds the pool
sel_indexes = indexes
xyzs = []
for index in sel_indexes:
xyz = grid_mids[index]
xyz += numpy.array([x * random.random() for x in margins])
xyzs.append(xyz)
return xyzs
[docs] def growFragmentCell(self, density, avdw_scale):
"""
Make a single attempt at building the cell by growing polymer fragments.
:type density: float
:param density: The density of the cell
:type avdw_scale: float
:param avdw_scale: The VDW scale factor for clash cutoffs
:rtype: `schrodinger.structure.Structure` or None
:return: The built cell, or None if an error occurred
"""
self.initTangledMethod(density)
self.setCellsForTangledChain()
self.copyAllStructsIntoCell()
self.preparePbcAndParams(avdw_scale)
self.setSubstrateSpearRings()
self.setVolGraphAndBitset()
components = self.getAllComponents()
multi_comp = len(components) > 1
for ini_frags in components:
if multi_comp and self.scaffold.scaffold_mode != CONTAINER:
try:
self.setIniFrags(ini_frags)
except InitiationError as err:
self.log(str(err))
return None
else:
try:
self.setIniFragsOnebyOne(ini_frags)
except InitiationError as err:
self.log(str(err))
return None
if self.grow_together and ini_frags != components[-1]:
# Grow all molecules simultaneously:
# continue to place INI frags into the cell until None left
continue
# Adjust structure of rest fragments in a molecule
while self.frags_by_mol:
# FIX ME: multi threads should help here due to
# 1. Before chains meet each other, grow each chain in one thread,
# and then check clashes only between newly added atoms on sync
# 2. When only a few chains cannot are left, grow the same single
# chain with different random numbers in each thread. If succeed
# by one thread, abandon other attempts in different threads
for frags_and_trial_num in self.frags_by_mol:
(frags_in_one_mol, tried_per_mol) = frags_and_trial_num
self.cur_frag = frags_in_one_mol.pop(0)
self.cur_bitset = mmbitset.Bitset.from_list(
self.cell.atom_total, self.cur_frag.atom_ids)
self.cur_frag.hitted_num += 1
dihe_values = self.cur_frag.getDiheValues()
if len(dihe_values):
self.setDistanceCell()
for dihe_value in dihe_values:
self.cell.adjust(dihe_value, *self.cur_frag.dihedral)
if self.hasClashes():
# Continue to try another dihedral value
continue
self.cur_frag.dihe_value = dihe_value
break
else:
# Back move, as all dihedrals in frag lead to clashes
pre_frag = self.cur_frag.pre_frag
if pre_frag.is_branching:
# Track other branches and delete from pre-existing
self.deleteOtherBranches(pre_frag, frags_in_one_mol)
if not pre_frag.dihe_value:
# Dead chain: pre_frag is INI frag due to back move
tried_per_mol += 1
if self.relocateFailedMol(pre_frag,
frags_in_one_mol,
tried_per_mol):
frags_and_trial_num[1] = tried_per_mol
# Continue to try fragment in next molecule
continue
else:
self.log(
f'Only {self.cur_struct_in_cell} molecules '
f'succeeded. ({self.num_structs} requested)'
)
return None
# The previous dihedral of pre_frag won't be considered again
# why need if statement:
# 1. one dihe deleted from pre_frag pool due to first time back move
# 2. starting from the parent of pre_frag, pre_frag.dihe_value is set randomly
# 3. the second time performing back move on pre_frag, the randomly set dihe
# may not be in the pool
if pre_frag.dihe_value in pre_frag.dihe_pool:
pre_frag.dihe_pool.remove(pre_frag.dihe_value)
pre_frag.continue_grow = False
self.bitSetOff(self.existing_atom_bitset, pre_frag)
frags_in_one_mol.insert(0, pre_frag)
continue
self.bitSetOn(self.cur_frag)
for frag_of_same_generation in self.cur_frag.next_frags:
frag_of_same_generation.continue_grow = True
if frag_of_same_generation.in_sidegroup:
# insert as the first frag to be grown next time
frags_in_one_mol.insert(0, frag_of_same_generation)
else:
frags_in_one_mol.append(frag_of_same_generation)
self.removeFinishedPolymer()
self.log('Successfully grew %d structures' % self.num_structs)
return self.cell
[docs] def initTangledMethod(self, density):
"""
Initialize some attributes for the tangled method.
:param density: the density of the cell
:type density: float
"""
self.total_tried = 0
self.cur_struct_in_cell = 0
self.existing_rings = []
self.existing_spear_rings = []
self.pre_atom_ids = set()
self.frags_by_mol = []
num_per_chain = [x.num_struct for x in self.all_type_polymer_and_frags]
self.num_structs = sum(num_per_chain)
polymers = [x.template_polymer for x in self.all_type_polymer_and_frags]
self.volume = self.getDesiredVolume(polymers,
density,
num_per_chain=num_per_chain)
# Estimate the distance between initiators
# Note: container may have inaccurate volume and self.r_per_ini_frag,
# but self.r_per_ini_frag decreases automatically to solve the issue
self.r_per_ini_frag = (self.volume / self.num_structs / 4 * 3 /
math.pi)**(1. / 3.)
[docs] def setCellsForTangledChain(self):
"""
Set cells for tangle chain method.
"""
self.scaffold.defineBox(self.volume)
self.cell = self.getNewCellStructure(self.volume)
self.cell_template = self.cell.copy()
self.ini_cell = self.cell_template.copy()
[docs] def copyAllStructsIntoCell(self):
"""
Copy all structures into the cell, but the bitsets are off.
Deepcopy fragments.
"""
self.all_mol_fragments = collections.defaultdict(list)
self.existing_rings += [x for x in self.cell.ring]
self.all_rings = self.existing_rings[:]
total_ringnum = sum([
one_type_polymer_and_frag.ringnum
for one_type_polymer_and_frag in self.all_type_polymer_and_frags
])
molnum = 0
for atype_polym_frag in self.all_type_polymer_and_frags:
for index in range(atype_polym_frag.num_struct):
molnum += 1
new_init_frag = atype_polym_frag.first_frag.deepCopy(molnum)
new_init_frag.adjustAtomIds(self.cell.atom_total)
self.all_mol_fragments[atype_polym_frag].append(new_init_frag)
struct = atype_polym_frag.template_polymer.copy()
self.cell.extend(struct)
self.setRingsForOneMol(new_init_frag, total_ringnum)
[docs] def preparePbcAndParams(self, avdw_scale):
"""
Prepare pbc, contact parameters, max distance for distance cell.
:type avdw_scale: float
:param avdw_scale: The VDW scale factor for clash cutoffs
"""
self.pbc = clusterstruct.create_pbc(self.cell)
log_debug(f"PBC: {','.join(map(str, self.pbc.getBoxLengths()))}")
self.cparams = infrastructure.ContactParams()
self.cparams.setShow1_4(False)
self.cparams.setCutoff(avdw_scale)
max_radius = max(x.radius for x in self.cell.atom)
self.max_distance = avdw_scale * max_radius * 2
log_debug(f"Minimum Clash Distance: {self.max_distance}")
[docs] def setSubstrateSpearRings(self):
"""
Set Spear Rings for the substrate.
"""
self.existing_spear_rings += [
ringspear.SpearRing(self.cell, ring.getAtomIndices(), pbc=self.pbc)
for ring in self.existing_rings
]
self.is_cg = msutils.is_coarse_grain(self.cell, by_atom=True)
[docs] def setVolGraphAndBitset(self):
"""
Set volume graph and bitset.
"""
self.vol_graph = XYZVolumeGraph(self.cell,
spacing=2.0,
scaffold=self.scaffold)
self.existing_atom_bitset = mmbitset.Bitset(size=self.cell.atom_total)
if not self.scaffold.scaffold:
return
for atom_id in range(1, self.scaffold.scaffold.atom_total + 1):
self.existing_atom_bitset.set(atom_id)
[docs] def setIniFragsOnebyOne(self, ini_frags):
"""
Set the initiator fragments one by one.
:param ini_frags: the initiators of some components to be placed into
the cell and grow later.
:type ini_frags: list of 'PolymerFragments'
"""
if not ini_frags:
return
self.log(f"Placing {len(ini_frags)} initiators...")
for ini_frag in ini_frags:
if not self.placeOneIniInCell(ini_frag):
raise InitiationError(
f'Only {self.cur_struct_in_cell} molecules '
'are successfully placed in cell, but '
f'{self.num_structs} are requested.')
# Turn on the initiator bitset, set fragment state, and register to
# the fragment pool to grow
self.bitSetOn(ini_frag)
for frag in ini_frag.next_frags:
frag.continue_grow = True
if ini_frag.next_frags:
self.frags_by_mol.append([ini_frag.next_frags[:], 0])
continue
# Rigid body components finish growth after initiation
self.cur_struct_in_cell += 1
self.log('failed molecule attempt: %i finished molecules: %i' %
(self.total_tried, self.cur_struct_in_cell))
[docs] def setIniFrags(self, ini_frags):
"""
Set the initiator fragments in batch mode.
:param ini_frags: the initiators of some components to be placed into
the cell and grow later.
:type ini_frags: list of 'PolymerFragments'
"""
if not ini_frags:
return
self.log(f"Placing {len(ini_frags)} initiators...")
diameter = self.getIniDiameter(ini_frags)
grid_mids, margins, indexes = self.getGrids(diameter)
tries_per_mol = 0
while ini_frags:
failed_frags = self.initiateMultiMols(ini_frags, grid_mids, margins,
indexes)
success_num = len(ini_frags) - len(failed_frags)
if not success_num:
tries_per_mol += 1
if tries_per_mol > self.tries_per_mol:
raise InitiationError(
f'Only {self.cur_struct_in_cell} molecules '
'are successfully placed in cell, but '
f'{self.num_structs} are requested.')
# Initiators placed into the cells, but some are not finished if
# they need to grow but haven't
self.log('failed molecule attempt: %i finished molecules: %i' %
(tries_per_mol, self.cur_struct_in_cell))
ini_frags = failed_frags
[docs] def getIniDiameter(self, frags):
"""
Get the largest diameters of the initiators by computing the longest
span between any pairs of atoms.
:param frags: list for fragments to be placed in
:type frags: list of 'PolymerFragments'
:return: the largest diameters of the initiators
:rtype: float
"""
dists = []
for frag in frags:
atoms = [self.cell.atom[x] for x in frag.atom_ids]
atom_xyz = [x.xyz for x in atoms]
dist = pdist(atom_xyz)
sdist = squareform(dist)
atom_id_pair = numpy.unravel_index(numpy.argmax(sdist), sdist.shape)
radius_sum = sum([atoms[i].radius for i in atom_id_pair])
max_dist = numpy.nanmax(dist) if len(dist) else 0.0
mdist = self.cparams.getCutoff() * radius_sum + max_dist
dists.append(mdist)
return max(dists)
[docs] def getAllComponents(self):
"""
Get all components to be placed in the cell. Volume, fragment number,
and replica number for each component are used to divide all component
into several groups.
:return: Large, soft, and fewer fragments first
:rtype: list of 'PolymerFragment' lists
"""
fragments = self.all_mol_fragments.values()
if self.grow_together:
return [list(itertools.chain.from_iterable(fragments)), []]
counts = [len(x) for x in fragments]
by_count = self.indexChanges(counts, reverse=True)
mols = [x[0].template_polymer for x in fragments]
radius_cubic = [sum([x.radius**3 for x in x.atom]) for x in mols]
by_size = self.indexChanges(radius_cubic)
frag_nums = [len(list(x[0].fragments())) for x in fragments]
by_flex = self.indexChanges(frag_nums, threshold=20)
by_all = [min(list(x)) for x in zip(by_count, by_size, by_flex)]
components = [[] for x in range(max(by_all) + 1)]
for index, fragments in zip(by_all, fragments):
components[index].extend(fragments)
return components
[docs] @staticmethod
def indexChanges(values, threshold=10., reverse=False):
"""
Use the threshold as the ratio criterion to index the sudden change
in the sorted sequence.
:param values: input values to divide
:type values: list of floats
:param threshold: the ratio criterion to identify the sudden change
:type threshold: float
:param reverse: If True, smaller values are grouped with smaller indexes.
Otherwise larger values are grouped with smaller indexes.
:type reverse: bool
:return: indexes are the group tokens
:rtype: list of int
"""
sorted_vals = sorted(values, reverse=True)
ratios = [1.]
ratios += [x / y for x, y in zip(sorted_vals[:-1], sorted_vals[1:])]
index, ratio_indexes = 0, []
for ratio in ratios:
if ratio >= threshold:
index += 1
ratio_indexes.append(index)
indexes = [ratio_indexes[sorted_vals.index(x)] for x in values]
if not reverse:
return indexes
max_index = max(indexes)
return [max_index - x for x in indexes]
[docs] def setDistanceCell(self):
"""
Set DistanceCell for the clash check of current fragment.
"""
# Pre-existing atoms in distance_cell don't change position during
# clash check for one certain fragment.
self.distance_cell = infrastructure.DistanceCell(
self.cell.handle, self.max_distance, self.pbc)
if self.cur_frag.rings:
# Ring spear uses a different cut distance
self.ringspear_distance_cell = infrastructure.DistanceCell(
self.cell.handle, ringspear.ROUGH_CUT_DISTANCE, self.pbc)
[docs] def writeDihedralDis(self, file_name='tmp_dis.txt'):
"""
Write the dihedral angle distribution of the structures in cell.
"""
# TEST ONLY
interval = old_div(360., (DIHEDRAL_NUM - 1))
my_array = numpy.zeros((DIHEDRAL_NUM, 2))
for idx in range(DIHEDRAL_NUM):
my_array[idx, 0] = idx * interval
for ini_frags in self.all_mol_fragments.values():
for ini_frag in ini_frags:
for frag in ini_frag.fragments(include_self=False):
dihe = self.cell.measure(*frag.dihedral) % 360
my_array[int(round(dihe, interval)), 1] += 1
numpy.savetxt(file_name, my_array)
[docs] def relocateFailedMol(self,
pre_frag,
frags_in_one_mol,
tried_per_mol,
undo_crystal_links=False):
"""
Place one failed molecule back into the cell.
:type pre_frag: {PolymerFragment}
:param pre_frag: the ini fragment of one polymer
:type frags_in_one_mol: list of {PolymerFragment}
:param frags_in_one_mol: polymer fragment of this polymer
within one polymer
:type tried_per_mol: int
:param tried_per_mol: failed trial number
:type undo_crystal_links: bool
:param undo_crystal_links: whether to unlink the chain from the crystals
:rtype: bool
:return: True, if successfully placed the ini fragment back
"""
if tried_per_mol == self.tries_per_mol:
# Reach the max trials for replacing one INT. Abort this cell
self.log(
'The growth of one molecule has failed %s times, reaching the '
'the maximum attempts per molecule set by -tries_per_mol.' %
self.tries_per_mol)
return False
self.total_tried += 1
self.log('failed molecule attempt: %i finished molecules: %i' %
(self.total_tried, self.cur_struct_in_cell))
# temporarily turn off INT frag and prepare voids search
self.bitSetOff(self.existing_atom_bitset, pre_frag)
try:
self.vol_graph.locateVoids(set(self.existing_atom_bitset))
except VolumeMemoryError as msg:
self.log(str(msg))
sys.exit()
if undo_crystal_links:
self.undoCrystalLinks(pre_frag)
self.resetOrigDihed(pre_frag)
# Try to add the dead chain back to cell by rotation and relocate
if self.placeOneIniInCell(pre_frag, new_chain=False):
pre_frag.resetFragDihedralPool()
for frag_of_same_generation in pre_frag.next_frags:
frag_of_same_generation.continue_grow = True
frags_in_one_mol.append(frag_of_same_generation)
# Successfully relocate one dead molecule and continue
self.bitSetOn(pre_frag)
return True
else:
self.log('Failed to relocate dead chains.')
return False
[docs] def hasClashes(self):
"""
Check whether current fragment has clashes and speared rings with
pre-existing structure.
:rtype: bool
:return: True, if has clashes
"""
if infrastructure.get_contacts(self.cell, self.cur_bitset,
self.distance_cell,
self.existing_atom_bitset, self.cparams):
# Atoms in the fragment clash with pre-exsiting atoms in cell
return True
if self.cur_frag.rings:
if ringspear.check_for_spears(self.cell,
atoms=self.pre_atom_ids,
rings=self.cur_frag.rings,
pbc=self.pbc,
cell=self.ringspear_distance_cell,
return_bool=True,
distorted=self.is_cg):
# Pre-exsiting atoms spear rings in the fragment
return True
if self.existing_rings:
# FIXME: shouldn't rebuild distance cell from scratch
# Temporary fix: extract substructure
# Add the bonds between fragments for ring spear check
if self.cur_frag.dihedral:
spear_struct = self.cell.extract(self.cur_frag.atom_ids +
[self.cur_frag.dihedral[2]])
else:
spear_struct = structure.Structure(
mm.mmct_ct_extract_atoms(self.cell.handle, self.cur_bitset),
True)
if ringspear.check_for_spears(self.cell,
spear_struct=spear_struct,
rings=self.existing_rings,
pbc=self.pbc,
spear_rings=self.existing_spear_rings,
return_bool=True,
distorted=self.is_cg):
# Atoms in the fragment spear rings in pre-exsiting atoms
return True
return False
[docs] def removeFinishedPolymer(self):
"""
Remove frags_by_mols that have no polymer fragments and update finished
polymer number.
"""
for frags_and_trial_num in reversed(self.frags_by_mol):
if not frags_and_trial_num[0]:
self.cur_struct_in_cell += 1
self.log(
'failed molecule attempt: %i finished molecules: %i' %
(self.total_tried, self.cur_struct_in_cell))
self.frags_by_mol.remove(frags_and_trial_num)
[docs] def bitSetOn(self, frag):
"""
Set on the pre-existing atom bitset according to atom ids in the fragment;
record rings and spear_rings according to rings in the fragment.
:type frag: `PolymerFragment`
:param frag: the polymer fragment providing the atom ids
"""
for atom_id in frag.atom_ids:
self.existing_atom_bitset.set(atom_id)
if self.has_rings:
self.pre_atom_ids.add(atom_id)
self.existing_rings += frag.rings[:]
frag.spear_rings = [
ringspear.SpearRing(self.cell, ring.getAtomIndices(), pbc=self.pbc)
for ring in frag.rings
]
self.existing_spear_rings += frag.spear_rings[:]
[docs] def bitSetOff(self, bitset, frag):
"""
Set the bitset off according to the atom ids in the fragment;
remove rings and spear_rings according to rings in the fragment.
:type bitset: `Bitset`
:param bitset: the bitset to set
:type frag: `PolymerFragment`
:param frag: the polymer fragment providing the atom ids
"""
for atom_id in frag.atom_ids:
bitset.unset(atom_id)
if self.has_rings:
self.pre_atom_ids.remove(atom_id)
for ring in frag.rings:
self.existing_rings.remove(ring)
for spear_ring in frag.spear_rings:
self.existing_spear_rings.remove(spear_ring)
[docs] def getPosition(self, new_chain):
"""
Find random positions in the cell according to the following rules.
If new_chain=True, randomly pick a position in cell that is away from
pre-existing initiators.
If new_chain=False, randomly loop over all the gridded positions in the
largest void in cell.
:type new_chain: bool
:param new_chain: if True, the fragment is from a new chain; else, the
fragment is from a dead chain.
:rtype: list
:return: [x, y, z], a random position in cell
"""
if new_chain:
return self.getRandXYZ()
else:
return self.getRandGridXYZ()
[docs] def getRandGridXYZ(self):
"""
Get xyz iterator of random grid points in the largest void.
:rtype: list
:return: [x, y, z], a random position in cell
"""
for xyz in self.vol_graph.getBuriedXYZ(self.vol_graph.getLargestVoid()):
# Volume graph and the scaffold box define different origin positions
# Shift xyz from volume graph into the scaffold box
shift_vector = self.scaffold.box.getTranslationToBox(xyz)
yield [b + a for a, b in zip(xyz, shift_vector)]
[docs] def getRandXYZ(self, max_num=2000, max_failed_num=2000):
"""
Return a random point in the space and no other initiators and scaffold
within a pre-defined raduis.
:type max_num: int
:param max_num: max number of xyz position returned
:type max_failed_num: int
:param max_failed_num: max number of attemps when finding points
self.r_per_ini_frag away from scaffold and other initiator atoms.
:rtype: iterator of list
:return: iterator of [x, y, z], a random position in cell
"""
self.ini_cell.addAtom('Br', 0, 0, 0)
added_atom_id = self.ini_cell.atom_total
point_num = 0
while point_num < max_num:
near_point = True
failed_num = 0
while near_point:
random_point = self.scaffold.getPointInBox()
self.ini_cell.atom[added_atom_id].xyz = random_point
near_point = False
for atom_id in range(1, self.ini_cell.atom_total):
distance = self.ini_cell.measure(added_atom_id, atom_id)
if distance < self.r_per_ini_frag:
near_point = True
failed_num += 1
if failed_num > max_failed_num:
failed_num = 0
self.r_per_ini_frag *= 0.9
break
point_num += 1
yield random_point
[docs] def deleteOtherBranches(self, pre_frag, frags_in_one_mol):
"""
Remove the fragments from the next_frags pool and
set off the bitset of moved fragments.
:type pre_frag: `PolymerFragment`
:param pre_frag: the branching parent polymer fragment
:type frags_in_one_mol: list of `PolymerFragment`
:param frags_in_one_mol: the pool of PolymerFragment to be grown
"""
frags_to_remove = pre_frag.next_frags[:]
frags_to_remove.remove(self.cur_frag)
while frags_to_remove:
frag_to_remove = frags_to_remove.pop(0)
if self.existing_atom_bitset.get(frag_to_remove.atom_ids[0]):
self.bitSetOff(self.existing_atom_bitset, frag_to_remove)
frags_to_remove += frag_to_remove.next_frags[:]
else:
frags_in_one_mol.remove(frag_to_remove)
[docs] def setPolymerFragments(self, vdw_scale):
"""
Set polymer fragments for tangled chain method.
:type avdw_scale: float
:param avdw_scale: The VDW scale factor for clash cutoffs
"""
# PolymerFragments order in this list is the order, according to which
# the initiators of the structures are placed into the cell.
# Rigid bodies are treated as huge initiators, and are first placed into
# the cell as they can be large and won't grow bit by bit.
self.all_type_polymer_and_frags = []
self.all_type_polymer_and_frags += self.getPolymerFragments(
vdw_scale, get_rigid=True)
self.all_type_polymer_and_frags += self.getPolymerFragments(
vdw_scale, get_rigid=False)
[docs] def getPolymerFragments(self,
vdw_scale,
get_rigid=False,
restore_template=None):
"""
Break polymers (if needed), prepare them, and return the corresponding
PolymerFragments objects.
:type avdw_scale: float
:param avdw_scale: The VDW scale factor for clash cutoffs
:type get_rigid: bool
:param get_rigid: If True, pick those marked as rigid bodies. If False,
skip those marked as rigid bodies.
:type restore_template: bool or None
:param restore_template: restore the template polymer to the original one.
If None, debug mode determines
:rtype: list of 'PolymerFragments'
:return: Each item contains fragment information for one molecule.
"""
polymer_and_frags = []
for index, (struct,
num_struct) in enumerate(zip(self.structs,
self.composition)):
is_rigid = bool(struct.property.get(RIGID_BODY_PROP))
if get_rigid != is_rigid:
# Request rigid body, but the struct is not marked as rigid, or
# Doesn't Request rigid body, but the struct is marked as rigid
continue
# multiple molecules may present in one ws or pt entry
for mol_idx, molecule in enumerate(struct.molecule):
mol_struct = molecule.extractStructure(copy_props=True)
indexing = '_'.join(map(str, [index, mol_idx]))
polymFrags = PolymerFragments(mol_struct,
num_struct,
forcefield=self.forcefield,
vdw_scale=vdw_scale,
temperature=self.dihe_temperature,
logger=self.logger,
indexing=indexing)
restore_template = not schrodinger.in_dev_env(
) if restore_template is None else restore_template
polymFrags.breakIntoFragments(is_rigid=is_rigid,
restore_template=restore_template)
prepare_building_block(polymFrags.template_polymer,
index,
recolor=self.recolor,
preserve_res=self.preserve_res)
polymer_and_frags.append(polymFrags)
return polymer_and_frags
[docs]class AmorphousCellBuilder(BuilderWithClashDetection, BuilderGrowInCellMixin):
"""
Builder for an amorphous cell
"""
[docs] def __init__(self,
scaffold=None,
system=True,
population=1,
composition=None,
density=0.5,
amorphous_vdw_scale=1.0,
obey=OBEY_DENSITY,
tries_per_dc=5,
tries_per_mol=5,
title="",
allow_increased_density=True,
forcefield=OPLS2005,
grow=False,
dihe_temperature=None,
grow_together=False,
recolor=False,
preserve_res=True,
split_components=False,
wam_type=None,
**kwargs):
"""
Create an AmorphousCellBuilder object
:type Scaffold: `Scaffold`
:param Scaffold: The Scaffold object for the cell to use that defines
the molecule the cell should be built around (or inside or on top of)
:type system: bool
:param system: Whether a Desmond system should be built with the final
amorphous cell
:type population: int
:param population: The number of molecules to include in the amorphous
cell
:type composition: list
:param composition: A list of the same length and order as structs which
gives the number of structures of each component
:type density: float
:param density: The initial target density for building the cell. Does
not include the scaffold volume or mass
:type amorphous_vdw_scale: float
:param amorphous_vdw_scale: The initial VdW scale factor to use when
looking for clashes in the amorphous cell
:type obey: str
:param obey: A module constant indicating which initial target to keep
constant when trying to build an amorphous cell - either OBEY_DENSITY to
keep the density constant, OBEY_CLASH to keep the VdW scale factor
constant, or OBEY_BOTH to keep both constant. The latter will prevent
the builder from relaxing one of these constraints to find a cell
without clashes.
:type tries_per_dc: int
:param tries_per_dc: The number of tries to make a cell at a given
density and VdW clash scale factor before relaxing the parameter that
is not specified by obey
:type tries_per_mol: int
:param tries_per_mol: The number of times to try placing a
molecule in the cell without clases before scrapping the cell and
starting over
:type title: str
:param title: The title to give the final cell structure
:type allow_increased_density: bool
:param allow_increased_density: If True and obey=OBEY_CLASH, allow
attempts to build cells at increasing density as long as a cell was
built successfully
:type forcefield: str
:param forcefield: The name of the force field to use. The default is
'OPLS_2005'.
:type grow: bool
:param grow: whether grow polymers via self-avoiding random walk
:type dihe_temperature: float, or None
:param dihe_temperature: the temperature to calculate dihedral angle distribution
:type grow_together: bool
:param grow_together: Grow all the molecules together if True
:type recolor: bool
:param recolor: Whether to recolor the components so that each molecule
of the same component is the same color
:type preserve_res: bool
:param preserve_res: Whether to preserve the current residue information
:type split_components: bool
:param split_components: Whether to split system in components in the
system build
:type wam_type: int or None
:param wam_type: One of the enums defined in workflow_action_menu.h if
the results should have a Workflow Action Menu in Maestro
See the parent class for additional keyword documentation
"""
BuilderWithClashDetection.__init__(self, **kwargs)
if scaffold is None:
self.scaffold = Scaffold()
else:
self.scaffold = scaffold
self.system = system
self.population = population
self.composition = composition
self.density = density
self.avdw_scale = amorphous_vdw_scale
self.obey = obey
self.tries_per_dc = tries_per_dc
self.tries_per_mol = tries_per_mol
self.title = title
self.allow_increased_density = allow_increased_density
self.forcefield = forcefield
self.grow = grow
self.dihe_temperature = dihe_temperature
self.grow_together = grow_together
self.recolor = recolor
self.preserve_res = preserve_res
self.split_components = split_components
self.wam_type = wam_type
self.debug = schrodinger.in_dev_env()
[docs] def setDensityIncreaseAllowed(self, is_allowed):
"""
Set whether increased cell densities can be attempted after a successful
cell is built using OBEY_CLASH
:type is_allowed: bool
:param is_allowed: Whether increased density attempts can be tried
"""
self.allow_increased_density = is_allowed
[docs] def build(self, generate_only=False, center=True):
"""
Build the cell and do all the post-processing
:type generate_only: bool
:param generate_only: Only generate the cell, do not write it to a file,
write final density, or attempt to create a Desmond system with it
:type center: bool
:param center: Center the structure in the PBC unit cell, assuming the
cell has its origin at 0, 0, 0
:rtype: `schrodinger.structure.Structure` or None
:return: The created structure or None if no structure was found
"""
self.log('Creating amorphous cell')
# Actually build the cell
cell = self.buildCellDriver()
if not cell:
return
if not generate_only:
self.log('Final cell density = %.3f' % get_density(cell))
# Allow custom post-treatment of the raw structure before the system is
# built
cell = self.postTreatBuiltCell(cell)
# Color if necessary
if self.color == COLOR_BY_MOLECULE:
self.log('Coloring the molecules...')
self.colorByMolecule(cell)
if center:
self.log('Translating the cell in space...')
# Center the structures in the unit cell (unit cell has one corner
# at the origin and all other vertices in the +xyz directions)
centroid = transform.get_centroid(cell)
half = old_div(self.getPBCSide(cell), 2.0)
center = [half, half, half]
move = [x - y for x, y in zip(center, centroid)]
transform.translate_structure(cell, *move)
if msutils.is_coarse_grain(cell, by_atom=True):
coarsegrain.set_as_coarse_grain(cell)
if generate_only:
return cell
# Create a Desmond system or just write out this structure
if self.system:
self.log('Creating the Desmond system...')
cms_name = desmondutils.run_system_builder(
cell,
self.basename,
rezero_system=True,
forcefield=self.forcefield,
split_components=self.split_components,
wam_type=self.wam_type)
if cms_name:
if self.backend:
self.backend.addOutputFile(cms_name)
self.backend.setStructureOutputFile(cms_name)
else:
self.log('Unable to create the Desmond system')
else:
amcell_file = self.basename + AMCELL_NO_SYSTEM_OUT
self.log('Writing final cell to %s' % amcell_file)
cell.write(amcell_file)
if self.backend:
self.backend.addOutputFile(amcell_file)
self.backend.setStructureOutputFile(amcell_file)
return cell
[docs] def postTreatBuiltCell(self, cell):
"""
The default implementation of this does nothing, but subclasses can use
this method to modify the raw cell immediately after the last structure
is added to it but before any other processing (such as centering,
coloring, writing out or building a Desmond system)
:type cell: `schrodinger.structure.Structure`
:param cell: The built cell
:rtype: `schrodinger.structure.Structure`
:return: The modified cell
"""
return cell
[docs] def buildCell(self, density, avdw_scale):
"""
Make a single attempt at building the cell.
:type density: float
:param density: The density of the cell
:type avdw_scale: float
:param avdw_scale: The VDW scale factor for clash cutoffs
:rtype: `schrodinger.structure.Structure` or None
:return: The built cell, or None if an error occurred
"""
try:
if self.grow:
cell = self.growFragmentCell(density, avdw_scale)
else:
cell = self.rotateRigidBodyCell(density, avdw_scale)
except ScaffoldError as msg:
self.log(str(msg))
return None
return cell
[docs] def buildCellDriver(self):
"""
Driver function for cell building. Tries multiple cells until one
succeeds. Adjust initial parameters to try to create more dense or
tighter class cells as needed.
:rtype: `schrodinger.structure.Structure` or None
:return: The built cell, or None if an error occurred
"""
num_structs = self.population
density = self.density
avdw_scale = self.avdw_scale
to_density = self.obey == OBEY_DENSITY
to_clash = self.obey == OBEY_CLASH
to_both = not to_density and not to_clash
if to_density:
# Adjust the clash vdw scale to build to the specific density
value = avdw_scale
parameter_adjuster = self.adjustACellClashVDWScale
parameter_string = 'clash vdw scale'
minimum_value = 0.03
elif to_clash:
# Adjust the density to build to the specific clash vdw scale
value = density
parameter_adjuster = self.adjustACellDensity
parameter_string = 'density'
minimum_value = 0.001
else:
# Only attempt the given density and clash value
value = density
parameter_adjuster = parameter_string = minimum_value = None
original_value = value
if self.structs is None:
# PolymerAmorphousCellBuilder doesn't have self.structs as input
self.structs, self.composition = self.getBuildingBlocks(num_structs)
if not self.structs:
# Failed to build single polymers
return None
if self.grow:
# Prepare building blocks for tangled chain method
vdw_min = minimum_value if to_density else None
vdw_scale = self.prepareTangledChainBlocks(avdw_scale, vdw_min)
if vdw_scale is None:
return None
if to_density:
value = vdw_scale
else:
avdw_scale = vdw_scale
def attempt_cell():
"""
Attempt to create the cell with the current parameters
:rtype: `schrodinger.structure.Structure` or None
:return: The built cell, or None if an error occurred
"""
cell = None
msg = ('Attempt %d at a cell with density = %.3f and clash '
'scale = %.3f')
for atry in range(self.tries_per_dc):
if to_density:
self.log(msg % (atry + 1, density, value))
cell = self.buildCell(density, value)
else:
self.log(msg % (atry + 1, value, avdw_scale))
cell = self.buildCell(value, avdw_scale)
if cell:
self.log('Amorphous cell populated')
self.log('')
return cell
return cell
# Try at ever decreasing parameter value until we succeed in packing all
# molecules
self.log('Creating the amorphous cell...')
cell = None
while not cell:
cell = attempt_cell()
if not cell:
# No successful cell was found - try making the value smaller
# (either less dense cell, or smaller clash vdw scale)
if to_both:
self.log('Unable to find cell with the given density and '
'VDW clash values')
return
value = parameter_adjuster(value, down=True)
if value < minimum_value:
self.log('Unable to find cell with %s > %.3f' %
(parameter_string, minimum_value))
return
successful_value = value
best_cell = cell
# If we succeeded at the initial values and density is the adjustable
# parameter, try to increase the density value as much as possible up to
# a point.
if (to_clash and successful_value == original_value and
self.allow_increased_density):
while cell:
value = parameter_adjuster(value, down=False)
self.log('Increasing %s to %.2f' % (parameter_string, value))
cell = attempt_cell()
if cell:
best_cell = cell
successful_value = value
if value > 2:
break
cell = best_cell
if to_density:
# Monte Carlo and steric pack steps may change density
self.log('Final %s = %.3f' % (parameter_string, successful_value))
return cell
[docs] def prepareTangledChainBlocks(self, vdw_scale, vdw_min):
"""
Break structures into polymer fragments and handle vdw scale adjustment.
:type vdw_scale: float
:param vdw_scale: the vdw scale to check clashes.
:type vdw_min: float or None
:param vdw_min: If float, this is the minimum allowable vdw scale after
adjustment. If None, don't allow vdw adjustment.
:rtype: float or None
:return: If float, this is the vdw scale.
"""
while True:
try:
self.setPolymerFragments(vdw_scale)
except RotationError as msg:
if vdw_min is None:
self.log(str(msg))
return None
elif vdw_scale < vdw_min:
self.log('Unable to find cell with clash vdw scale > %.3f' %
vdw_scale)
return None
vdw_scale = self.adjustACellClashVDWScale(vdw_scale, down=True)
self.log(
'Decreasing vdw scale to %.2f due to unvoidable clashes' %
vdw_scale)
else:
return vdw_scale
[docs] def getBuildingBlocks(self, number):
"""
Get the structures to place into the amorphous cell. Must be overridden
in the subclass to produce structures
:type number: int
:param number: The number of structures to return
:rtype: list
:return: list of `schrodinger.structure.Structure` objects to place
into the cell
"""
return []
[docs] def rotateRigidBodyCell(self, density, avdw_scale):
"""
Make a single attempt at building the cell by random placing and rotating
rigid structures.
:type density: float
:param density: The density of the cell
:type avdw_scale: float
:param avdw_scale: The VDW scale factor for clash cutoffs
:rtype: `schrodinger.structure.Structure` or None
:return: The built cell, or None if an error occurred
"""
volume = self.getDesiredVolume(self.structs,
density,
num_per_chain=self.composition)
self.scaffold.defineBox(volume)
cell = self.getNewCellStructure(volume)
cell_rand = cell.copy()
scaffold_mol_num = len(cell_rand.molecule)
has_custom_mmffld = detect_custom_mmffld_data(self.structs)
# A list of structures in the order the population is randomly sampled
all_structs = []
for struct, num_struct in zip(self.structs, self.composition):
all_structs += [struct.copy() for num in range(num_struct)]
struct_order = [x for x, y in enumerate(all_structs)]
# Randomize the structure order when placing them into the cell
random.shuffle(struct_order)
# Place the structures in the cell one by one
for index, pick_idx in enumerate(struct_order):
# Let the user know that provided (monomer) structure has
# intra-structure clashes
raw_clashes = self.checkForIntraStructureClashes(
all_structs[pick_idx], scale=avdw_scale)
if raw_clashes:
msg = 'WARNING: molecule "%s" has intra-structure clashes.'
self.log(msg % struct.title)
success = self.placeStructureInCell(cell_rand,
all_structs[pick_idx],
avdw_scale)
if not success:
break
if not success:
self.log('Successfully placed only %d structures' % index)
return None
# To be consistent with the log info: "Chain 1: AAB; Chain 2: BCA",
# Restore the structure order to the polymer chain build order
for orig_idx, struct in enumerate(all_structs):
mol_id = struct_order.index(orig_idx) + scaffold_mol_num + 1
new_xyz = cell_rand.molecule[mol_id].extractStructure().getXYZ()
struct.setXYZ(new_xyz)
if has_custom_mmffld:
# This call is expensive, so we only do it if we have to
# preserve mmffld custom charge blocks (MATSCI-8373)
cell = structure.Structure(
mm.mmffld_extendCTwithCustomCharges(cell, struct))
else:
cell.extend(struct)
return cell
[docs] def placeStructureInCell(self,
cell,
struct,
avdw_scale,
no_rotation=False,
rotate_velocities=False):
"""
Place a structure in the cell. Make multiple rotations and translations
to try to find a spot that the structure fits in with no clashes
:type cell: `schrodinger.structure.Structure`
:param cell: The current cell with structures that have already been
placed
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to attempt to place into the cell
:type avdw_scale: float
:param avdw_scale: The VDW scale factor for clash cutoffs
:param bool no_rotation: If True, do not rotate the structure
:param bool rotate_velocities: If True, besides the structure, also
rotate FFIO atom velocities (if present)
:rtype: bool
:return: Whether the structure was placed in the cell successfully
"""
transform.translate_centroid_to_origin(struct)
clashes = True
attempts = 0
cell_rings = self.findRings(cell)
struct_rings = self.findRings(struct)
# Find all existing clashes in the structure before the PBC and before
# adding to the cell - we'll ignore these as they are the result of
# building the structure to the user's specifications and might have
# been unavoidable
raw_clashes = self.checkForIntraStructureClashes(struct,
scale=avdw_scale)
# Keep trying different rotations and translations until one works with
# no clashes
while clashes and attempts < self.tries_per_mol:
attempts += 1
candidate = self.locateAndRotate(
struct,
no_rotation=no_rotation,
rotate_velocities=rotate_velocities)
candidate_rings = self.duplicateRings(struct_rings, candidate)
try:
clashes = self.checkForClashes(cell, candidate, avdw_scale,
raw_clashes, cell_rings,
candidate_rings)
# Can be raised by self.getClashes (MATSCI-2620, MATSCI-4296)
except ValueError as msg:
self.log('Cannot detect clashes: ' + str(msg))
continue
if not clashes:
# Success! Add the structure to the cell
cell.extend(candidate)
return not clashes
[docs] def duplicateRings(self, original_rings, new_struct):
"""
Take a list of _Ring objects referenced to one structure and convert to
a list of _Ring objects referenced to a new structure. This is much
faster than finding rings in the new structure. It is implied that the
new and old structure have identical bonding.
:type original_rings: iterable of _Ring objects
:param original_rings: The _Ring objects to duplicate
:param new_struct: `schrodinger.structure.Structure`
:param new_struct: The new structure the ring objects should reference
:rtype: list
:return: List of `schrodinger.structure._Ring` objects that reference
atoms in new_structure
"""
new_rings = []
for ring in original_rings:
atoms = ring.getAtomIndices()[:]
new_ring = structure._Ring(new_struct, ring._ringnum, atoms, None)
new_rings.append(new_ring)
return new_rings
[docs] def locateAndRotate(self,
struct,
no_rotation=False,
rotate_velocities=False):
"""
Randomly rotate a copy of struct and translate it to a position in
the cell
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to attempt to place into the cell
:param bool no_rotation: If True, do not rotate the structure
:param bool rotate_velocities: If True, besides the structure, also
rotate FFIO atom velocities (if present)
:rtype: `schrodinger.structure.Structure`
:return: A copy of struct that has been randomly rotated and translated
"""
scopy = struct.copy()
# Randomly rotate the structure
centroid, rmatrix = random_rotation(scopy,
no_rotation=no_rotation,
rotate_velocities=rotate_velocities)
# Only dump the rotation matrix in DEBUG
textlogger.log(self.logger,
'Rotation matrix:',
level=textlogger.logging.DEBUG)
textlogger.log(self.logger,
str(rmatrix),
level=textlogger.logging.DEBUG)
# Move to random location
xyz = self.scaffold.getPointInBox()
new_xyz = scopy.getXYZ() + xyz
scopy.setXYZ(new_xyz)
return scopy
[docs] def adjustACellDensity(self, density, down=True):
"""
Adjust the cell density up or down - the amount it is adjusted depends
on its current value and the direction it is being adjusted
:type density: float
:param density: The current cell density
:type down: bool
:param down: Whether to adjust the density down (or up if False)
:rtype: float
:return: The adjusted density
"""
if down:
if density > 0.15:
density = density - 0.1
else:
density = density * 0.5
else:
if density < 0.1:
density = density * 1.5
else:
density = density + 0.1
return density
[docs] def adjustACellClashVDWScale(self, avdw_scale, down=True):
"""
Adjust the cell clash scaling up or down - the amount it is adjusted
depends on its current value and the direction it is being adjusted
:type avdw_scale: float
:param avdw_scale: The current clash vdw scale factor
:type down: bool
:param down: Whether to adjust the scale factor down (or up if False)
:rtype: float
:return: The adjusted scale factor
"""
if down:
if avdw_scale > 1:
avdw_scale = avdw_scale * 0.9
elif avdw_scale <= 0.11:
avdw_scale = avdw_scale - 0.03
else:
avdw_scale = avdw_scale - 0.1
else:
if avdw_scale < 1:
avdw_scale = avdw_scale + 0.1
else:
avdw_scale = avdw_scale * 1.1
return avdw_scale
[docs] def checkForClashes(self, cell, candidate, avdw_scale, raw_clashes,
cell_rings, candidate_rings):
"""
Check for clashes between the candidate structure and structures already
within the cell, and also intrastructure clashes within the candiate
that are the result of the periodic boundary condition. If
intrastructure clashes are found, interstructure clashes are not checked
for.
:type cell: `schrodinger.structure.Structure`
:param cell: The current cell with structures that have already been
placed
:type candidate: `schrodinger.structure.Structure`
:param candidate: The structure to attempt to place into the cell
:type avdw_scale: float
:param avdw_scale: The VDW scale factor for clash cutoffs
:type raw_clashes: dict
:param raw_clashes: Those clashes that exist in the candidate without
accounting for the PBC
:type cell_rings: list
:param cell_rings: Each item of the list is a
`schrodinger.structure._Ring` object returned by the findRings()
method.
:type candidate_rings: list
:param candidate_rings: Each item of the list is a
`schrodinger.structure._Ring` object returned by the findRings()
method.
:rtype: dict
:return: Any clashes found. Keys are atom indexes values are all the
atom indexes that clash with that atom
"""
pbc = []
prop = desmondutils.CHORUS_PROP_PREFIX + '%s%s'
for letter in 'abc':
for m in 'xyz':
pbc.append(cell.property[prop % (letter, m)])
# Intrastructure clashes
clashes = self.checkForIntraStructureClashes(candidate,
scale=avdw_scale,
pbc=pbc)
if clashes:
# We only care about those caused by the PBC
pbc_only_clashes = self.removeIgnoredClashes(clashes, raw_clashes)
if pbc_only_clashes:
return pbc_only_clashes
# Interstructure clashes
clashes = self.checkForInterStructureClashes(cell, candidate,
avdw_scale, pbc,
cell_rings,
candidate_rings)
return clashes
[docs] def checkForInterStructureClashes(self, cell, candidate, avdw_scale, pbc,
cell_rings, candidate_rings):
"""
Check for clashes of the structure with other structures in the cell,
including the periodic boundary condition. Also check for ring spears.
The first check that finds clashes will return those clashes and no more
checks will be made.
:type cell: `schrodinger.structure.Structure`
:param cell: The current cell with structures that have already been
placed
:type candidate: `schrodinger.structure.Structure`
:param candidate: The structure to attempt to place into the cell
:type avdw_scale: float
:param avdw_scale: The VDW scale factor for clash cutoffs
:type pbc: None, infrastructure.PBC, or list
:param pbc: If periodic boundary conditions should be used, provide
either an infrastructure.PBC object or the parameters to construct one.
Allowed constructors:
* a, b, c : box lengths
* a, b, c, alpha, beta, gamma box : box lengths and angles
* ax, ay, az, bx, by, bz, cx, cy, cz : box vectors
:type cell_rings: list
:param cell_rings: Each item of the list is a
`schrodinger.structure._Ring` object returned by the findRings()
method.
:type candidate_rings: list
:param candidate_rings: Each item of the list is a
`schrodinger.structure._Ring` object returned by the findRings()
method.
:rtype: dict
:return: Any clashes found. Keys are atom indexes values are all the
atom indexes that clash with that atom
"""
if cell.atom_total == 0:
# No interstructure clashes possible
return {}
# Note: Timings indicate that for systems on the order of 5000 atoms,
# this get_contacts approach is competitive with using
# DistanceCellIterator and
# creating the distance cell once and then using that cell for each
# candidate. For systems of 100,000 atoms, the DistanceCellIterator
# appears to be about 25X faster (depending on how many candidates are
# checked for each unchanged cell). But DistanceCellIterator doesn't use
# PBC currently, so we are sticking with get_contacts for now.
clashes = self.getClashes(cell, avdw_scale, struct2=candidate, pbc=pbc)
if clashes:
return clashes
# Find ring spears where the candidate spears a cell ring
clashes = self.findRingSpears(cell,
spear_struct=candidate,
rings=cell_rings,
ring_based=False,
pbc=pbc)
if clashes:
return clashes
# Find ring spears where the cell spears a candidate ring
clashes = self.findRingSpears(candidate,
spear_struct=cell,
rings=candidate_rings,
pbc=pbc)
return clashes
[docs] def getPBCSide(self, cell):
"""
Get the length of one side of the periodic boundary condition
:type cell: `schrodinger.structure.Structure`
:param cell: The current cell with structures that have already been
placed
:rtype: float
:return: The length of one side of the periodic boundary condition. 0 is
returned if no PBC has been set on the cell
"""
return cell.property.get('r_pdb_PDB_CRYST1_a', 0.0)
[docs] def getNewCellStructure(self, volume):
"""
Get a new, empty structure with the periodic boundary condition set
:type volume: float
:param volume: The desired volume of the periodic boundary condition
:rtype: `schrodinger.structure.Structure`
:return: The structure to add the properties to
"""
cell = self.scaffold.getNewCell()
self.scaffold.addPBCProperties(cell, volume)
cell.title = 'amorphous %s' % self.title
return cell
[docs] def getDesiredVolume(self, structures, density, num_per_chain=None):
"""
Get the volume of a cube that will have the given density when the
given structures are placed entirely inside it
:type structures: list
:param structures: The list of structures to be placed into the
box
:type density: float
:param density: The desired density (g/cm3)
:type num_per_chain: list of int
:param num_per_chain: The number of each structure. If provided,
this list must be of the same length as the above structure list
:rtype: float
:return: The desired box side
"""
if num_per_chain:
mass = sum([
struct.total_weight * num
for struct, num in zip(structures, num_per_chain)
])
else:
# It's possible that not all structures have the same mass
mass = sum([x.total_weight for x in structures])
volume = AMU_PER_A3_TO_G_PER_CM3 * mass / density
volume = volume + self.scaffold.getExcludedVolume()
return volume
[docs]class EnergySurface(object):
"""
Calculate energy surface, and dihedral angle probability distrubtion.
"""
[docs] def __init__(self, orig_dihe_values):
"""
Save original dihedral values and initialize energy.
:type orig_dihe_values: list of float
:param orig_dihe_values: The original dihedral values
"""
self.dihe_num = len(orig_dihe_values)
self.dihe_values = numpy.array(orig_dihe_values)
self.energy = numpy.zeros(self.dihe_num)
[docs] def setBoltzmannDistribution(self, struct, dihedral, forcefield_num,
force_constant, beta):
"""
Calculate torsion energy surface. If any energies on the surface are invalid,
set uniform distribution. If all are valid, set Boltzmann distribution
:type struct: {schrodinger.Structure.structure}
:param struct: the local structure for dihedral angle energy surface
:type dihedral: list of int
:param dihedral: the 4 ints are the atom ids of target dihedral angle
:type forcefield_num: int
:param forcefield_num: the mmffld integer for the given forcefield
:type force_constant: float
:param force_constant: the energy constant for torsion restraints
:type beta: float
:param beta: 1/kT to get Boltzmann factor (E / kT)
"""
# minimize, rotate, restrained minimize... as L{BondRotator}
# Canonicalize positions so that Minimizer works on reasonable initial
# struct
try:
canonicalizer = fast3d.Canonicalizer()
canonicalizer.canonicalizePositions(struct)
except RuntimeError:
# 'fragmentation failed' may happen as in canonicalizer_test.py
self.setUniformDistribution()
return
with minimize.minimizer_context(
struct=struct, no_zob_restrain=True,
ffld_version=forcefield_num) as minimizr:
for idx, dihe_value in enumerate(self.dihe_values):
struct.adjust(dihe_value, *dihedral)
minimizr.addTorsionRestraint(*(dihedral +
[force_constant, dihe_value]))
min_res = minimizr.minimize()
self.dihe_values[idx] = minimizr.getStructure().measure(
*dihedral)
self.energy[idx] = min_res.potential_energy
minimizr.deleteAllRestraints()
if any(numpy.isinf(self.energy)):
# After minimization, E = inf
self.setUniformDistribution()
return
# Calculate Boltzmann distribution
# Shift the energy minimum to zero so that all energy values are positive
# and the max probability value is 1
self.energy -= min(self.energy)
self.torsion_probability = numpy.exp(-self.energy * beta)
# No zero probability values, and thus numpy.random.choice always generate
# a full list of dihedral angle values
self.torsion_probability[self.torsion_probability < 10**-10] = 10**-10
p_sum = numpy.sum(self.torsion_probability)
self.torsion_probability /= p_sum
[docs]class RotationError(Exception):
pass
[docs]class PolymerFragments(BuilderWithClashDetection):
"""
Save the struct and break it into fragments.
"""
LETTER_NUM = 26
[docs] def __init__(self,
struct,
num_struct,
dihedral_min=0.,
dihedral_max=360.,
dihedral_num=DIHEDRAL_NUM,
dihedral_exclude=False,
only_backbone=False,
vdw_scale=1.,
forcefield=OPLS2005,
temperature=None,
pop_local_clash=True,
logger=None,
indexing='0'):
"""
Prepare infomation for in-cell grow of one single polymer chain.
:type struct: `schrodinger.structure.Structure`
:param struct: the structure to be breaked into fragments
:type num_struct: int
:param num_struct: copy the struct by num_struct times when
placing them into the cell
:type dihedral_min: float
:param dihedral_min: lower limit of dihedral angle
:type dihedral_max: float
:param dihedral_max: upper limit of dihedral angle
:type dihedral_num: int
:param dihedral_num: the number of dihedral values
:type dihedral_exclude: bool
:param dihedral_exclude: (0, dihedral_min) and (dihedral_max, 360)
are used as dihedral angle range, if True.
:type only_backbone: bool
:param only_backbone: only the rotatable bonds in backbone is considered
:type vdw_scale: float
:param vdw_scale: VDW scale factor to use
:type forcefield: str
:param forcefield: The name of the force field to use. The default is
'OPLS_2005'.
:type temperature: float, or None
:param temperature: the temperature to calculate dihedral angle distribution
using boltzmann factor. If None, a uniform distribution is used.
:type pop_local_clash: bool
:param pop_local_clash: whether remove dihedral angle values that lead to
local clashes
:type logger: `logging.Logger`
:param logger: logger for output info
:type indexing: `str`
:param indexing: the indexing based on compositions and molecules
"""
self.original_polymer = struct
self.num_struct = num_struct
self.dihedral_min = dihedral_min
self.dihedral_max = dihedral_max
self.dihedral_num = dihedral_num
self.dihedral_exclude = dihedral_exclude
self.only_backbone = only_backbone
self.vdw_scale = vdw_scale
self.forcefield = forcefield
self.temperature = temperature
self.logger = logger
self.debug = schrodinger.in_dev_env()
self.backend = jobcontrol.get_backend()
super(PolymerFragments, self).__init__(vdw_scale=vdw_scale,
logger=self.logger)
self.template_polymer = struct.copy()
self.moieties = None
# template_polymer will be broken into fragments, and some of them are
# further instantiated as Monomer objects. setupTacticity method in
# Monomer constructor throws warnings when get_chiral_atoms method
# analyzes struct with 's_st_Chirality_' property
msutils.remove_properties(self.template_polymer,
matches=['s_st_Chirality_'])
self.template_polymer_rings = self.template_polymer.find_rings(
sort=True)
self.ringnum = len(self.template_polymer_rings)
self.has_rings = bool(self.ringnum)
self.torsion_energy_surf = None
# Only enable pop_local_clash for long chains with large vdw scale
self.pop_local_clash = (pop_local_clash and
self.template_polymer.atom_total > 1000 and
self.vdw_scale > 0.6)
self.setDiheInit()
self.debug_file = indexing + '.mae'
[docs] def setDiheInit(self):
"""
Set descrete dihedral values.
"""
if self.dihedral_exclude:
self.dihe_init = []
for dihe in numpy.linspace(self.dihedral_max - 360,
self.dihedral_min, self.dihedral_num):
if dihe <= 0:
dihe += 360.0
self.dihe_init.append(dihe)
else:
self.dihe_init = list(
numpy.linspace(self.dihedral_min, self.dihedral_max,
self.dihedral_num))
if self.dihedral_min == 0.0:
# Since dihedral range is (0, 360], convert 0 to 360
self.dihe_init.remove(self.dihedral_min)
if self.dihedral_max != 360.0:
self.dihe_init.append(360.0)
[docs] def breakIntoFragments(self, is_rigid=False, restore_template=True):
"""
Break polymer into fragments.
:type is_rigid: bool
:param is_rigid: set the molecule as one fragment.
:type restore_template: bool
:param restore_template: restore the template polymer to the original one.
"""
if not self.template_polymer:
return
if not is_rigid and self.checkAndmarkPolymerProp():
self.setAllfragments()
else:
# Rigid body found
self.setAsOneFragment()
return
if self.pop_local_clash or self.temperature:
self.setLocalStructs()
if self.pop_local_clash:
self.popLocalClashes()
if self.temperature:
self.torsionEnergySurf(temperature=self.temperature)
# Overwrite any changes to template_polymer beyond xyz
self.original_polymer.setXYZ(self.template_polymer.getXYZ())
if restore_template:
self.template_polymer = self.original_polymer
self.first_frag.template_polymer = self.template_polymer
for atom in self.template_polymer.atom:
# MATSCI-11822: remove alt_xyz so that no bonds look broken
atom.alt_xyz = None
atom.property.pop('r_m_pdb_occupancy', None)
[docs] def checkAndmarkPolymerProp(self):
"""
Check the struct and mark with necessary polymer properties.
:rtype: bool
:return: True, if the structure is successfully marked with polymer
properties.
"""
# Coarsegrain structures check s_m_atom_name instead
if msutils.is_coarse_grain(self.template_polymer):
is_polymer = self.isCgPolymer()
else:
is_polymer = self.isAllAtomPolymer()
if not is_polymer:
# Polymer residue information is assigned to non-polymer struct,
# if the non-polymer struct has rotatable bonds.
is_polymer = self.setAndMarkResidues()
if is_polymer:
self.extractMoieties()
self.updateOrigAtomIdx()
self.markMoietiesProps()
try:
self.moieties = Moieties('',
structs=self.moiety_structs.values(),
one_letter=False)
except MoietyError:
return False
return is_polymer
[docs] def setLocalStructs(self):
"""
Set the local structures.
"""
self.findNeighborFrags()
self.setLocalStructAtomIds()
self.saveLocalStructs()
[docs] def setAsOneFragment(self):
"""
Set the whole molecule as one fragment.
"""
self.first_frag = PolymerFragment(
0, 0, template_polymer=self.template_polymer)
self.first_frag.atom_ids = list(
range(1, self.template_polymer.atom_total + 1))
# MATSCI-5503: rigid molecules should be centered
self.structToOriginByFrag()
[docs] def setAllfragments(self):
"""
Set all fragments.
"""
if not self.only_backbone:
# Break the monomer moieties into more small moieties
# Update monomer, all_moieties and residue (template polymer)
for moiety in list(self.moieties.monomers.values()):
moiety.setAsFragment()
moiety.template_polymer = self.template_polymer
for sub_moiety in moiety.breakTemplatePolymer():
moiety_pdbre = sub_moiety.pdbres.rstrip()
self.moieties.monomers[moiety_pdbre] = sub_moiety
self.moieties.all_moieties.append(sub_moiety)
for moiety in self.moieties.all_moieties:
moiety.setAsFragment()
# Renumber resnum so that no two residues use the same resnum
self.renumberResGetM2PMap()
self.updateAllAtomIndexMaps()
try:
self.createFragForInitiator()
except KeyError:
# MATSCI-8369: renumberResGetM2PMap() modified the self.atom_idx_map_m2p
# in a way that createFragForInitiator() gives a traceback when residue_num = 3.
# The issue is narrowed down to the conflict between MONOMER_ORIG_ATOM_IDX_PROP
# modified by breakTemplatePolymer() and empty return from getDelNextResnumByResnum()
# However, I am not able to identify the exact bug and this is only a workaround
# to ignore some rotatable bonds and make the builder job successful
self.renumberResGetM2PMap(
monomer_orig_idx_prop=MONOMER_ORIG_IDX_PROP2)
self.updateAllAtomIndexMaps()
self.createFragForInitiator()
self.finishPolymerFrags()
self.appendMissedFrags()
self.assignRingAtomIds()
self.first_frag.markBranchingFrag()
self.first_frag.setDihedrals(self.dihe_init)
self.first_frag.markSidegroupFrag()
self.structToOriginByFrag()
[docs] def saveLocalStructs(self):
"""
Save local structs and update dihedrals according to the atom ids in
newly saved local structs.
"""
self.local_structs = {}
is_coarse_grain = msutils.is_coarse_grain(self.template_polymer)
capper_terminator = self.getCapperTerminator(is_coarse_grain)
smiles_generator = smiles.SmilesGenerator(wantAllH=not is_coarse_grain)
for frag in self.first_frag.fragments(include_self=False):
# atom ids change due to struct extraction
struct, atom_id_map = self.extractWithMap(
self.template_polymer, frag.local_struct_atom_ids)
grow_markers = []
for atom in struct.atom:
atom.property[ORIG_ATOM_IDX_PROP] = atom.index
# cap atom in broken bonds with H or C atoms
for end_atom_id in frag.end_atom_ids:
grow_marker = atom_id_map[end_atom_id]
grow_markers.append(grow_marker)
grower = next(struct.atom[grow_marker].bonded_atoms).index
capper_terminator.addToChain(struct,
grower,
grow_marker,
orientation=HEADFIRST,
remove_marker=False)
# copy marker ORIG_ATOM_IDX_PROP to H/C capper
struct.atom[struct.atom_total].property[ORIG_ATOM_IDX_PROP] = \
struct.atom[grow_marker].property[ORIG_ATOM_IDX_PROP]
struct.deleteAtoms(grow_markers)
# atom ids change due to adding capper atoms
atom_id_map2 = {}
for atom in struct.atom:
orig_atom_id = atom.property.get(ORIG_ATOM_IDX_PROP)
atom_id_map2[orig_atom_id] = atom.index
# atom ids change due to reorder_atoms
smile_str, smile_map = smiles_generator.getSmilesAndMap(struct)
if smile_str not in self.local_structs:
try:
struct = build.reorder_atoms(struct, smile_map)
self.local_structs[smile_str] = struct
except:
# Some invalid structs (e.g. delete some H atoms from a valid struct)
# cause Exception: The 'new_order' list must have xx elements.
self.local_structs[smile_str] = None
frag.local_struct_smiles = smile_str
continue
# three-step map: struct extraction, capping with H/C; smile_map reorder_atoms
dihe_atom_ids = [
smile_map.index(atom_id_map2[atom_id_map[x]]) + 1
for x in frag.dihedral
]
mapped_local_struct_side_dihedrals = []
for local_struct_side_dihedral in frag.local_struct_side_dihedrals:
mapped_local_struct_side_dihedrals.append([
smile_map.index(atom_id_map2[atom_id_map[x]]) + 1
for x in local_struct_side_dihedral
])
frag.local_struct_dihedral = dihe_atom_ids
frag.local_struct_side_dihedrals = mapped_local_struct_side_dihedrals
frag.local_struct_smiles = smile_str
frag.torsion_pattern = ' '.join(
[smile_str, ','.join(map(str, dihe_atom_ids))])
[docs] def popLocalClashes(self):
"""
Try dihedral value one by one for the target torsion in local structs,
and eliminate those leading to inevitable local clashes by incomplete
conformer search of local structs.
"""
no_clash_dihe = {}
for frag in self.first_frag.fragments(include_self=False):
if frag.torsion_pattern in no_clash_dihe:
# This dihedral type has been calculated previously
frag.dihe_rand = no_clash_dihe[frag.torsion_pattern][:]
frag.pool = frag.dihe_rand[:]
continue
# Calculate new no-clash dihedral candidates for new type of dihedral
local_struct = self.local_structs[frag.local_struct_smiles]
if not local_struct:
no_clash_dihe[frag.torsion_pattern] = frag.dihe_rand[:]
continue
dihedral = frag.local_struct_dihedral
dihe_range = frag.dihe_rand[:]
side_dihe_num = len(frag.local_struct_side_dihedrals)
rings = local_struct.ring
self.has_rings = bool(rings)
# Remove dihedral values leading to unavoidable local clashes
# 1. Copy the local structure around one center dihedral
# 2. Set the center dihedral to one possible candidate value
# 3. Randomly rotate side dihedrals in the local structure to
# generate MAX_HIT_NUM conformers
# 4. If all of the conformers have intra clashes, remove that
# dihedral value from the center dihedral candinates
for dihe_value in dihe_range:
struct = local_struct.copy()
struct.adjust(dihe_value, *dihedral)
local_clash = self.checkForIntraStructureClashes(struct,
rings=rings)
if not local_clash:
continue
# FIXME: this functions is to slow to do full conformational search
# If local struct has 5 side rotatable bonds, 73^5 generates too many
# conformers, and the checkForIntraStructureClashes is too slow.
# Currently randomly pick some out of all conformers
for conformer_idx in range(MAX_HIT_NUM * side_dihe_num):
for side_dihe_idx, side_dihe_value in enumerate(
numpy.random.choice(dihe_range, side_dihe_num)):
struct.adjust(
side_dihe_value,
*frag.local_struct_side_dihedrals[side_dihe_idx])
if not self.checkForIntraStructureClashes(struct,
rings=rings):
local_clash = False
break
if local_clash:
# If MAX_HIT_NUM * side_dihe_num conformers all have clashes,
# remove this value from dihedral angle candidates
frag.dihe_rand.remove(dihe_value)
no_clash_dihe[frag.torsion_pattern] = frag.dihe_rand[:]
frag.dihe_pool = frag.dihe_rand[:]
if not frag.dihe_rand:
msg = ("No clash-free rotation can be found for atoms %s, "
"please use a smaller VDW scale factor." %
'-'.join(map(str, frag.dihedral)))
raise RotationError(msg)
[docs] @common.mmffld_environment()
def torsionEnergySurf(self, force_constant=10., temperature=300.):
"""
Sample torsion energy surface for local structs, and initialize the
dihedral angle distribution using Boltzmann distribution.
:type force_constant: float
:param force_constant: the energy constant for torsion restraints
:type temperature: float, or None
:param temperature: the temperature to calculate dihedral angle distribution
"""
self.torsion_energy_surf = {}
beta = old_div(1.0, (BOLTZMANN * temperature))
forcefield_num = mm.opls_name_to_version(self.forcefield)
for frag in self.first_frag.fragments(include_self=False):
if frag.torsion_pattern in self.torsion_energy_surf:
# This torsion energy surf has been calculated previously
frag.torsion_probability = self.torsion_energy_surf[
frag.torsion_pattern].torsion_probability.copy()
# randomly sample according to the Boltzmann distribution
frag.dihe_pool = list(
numpy.random.choice(frag.dihe_rand,
len(frag.dihe_rand),
replace=False,
p=frag.torsion_probability))
continue
# generate energy surface for this new torsion_pattern
local_struct = self.local_structs[frag.local_struct_smiles]
energy_surf = EnergySurface(frag.dihe_rand)
# if invalid struct, assign uniform distribution
if not local_struct or desmondutils.find_forcefield_invalid_structures(
[local_struct], forcefield_num):
energy_surf.setUniformDistribution()
else:
struct = local_struct.copy()
dihedral = frag.local_struct_dihedral
energy_surf.setBoltzmannDistribution(struct, dihedral,
forcefield_num,
force_constant, beta)
self.torsion_energy_surf[frag.torsion_pattern] = energy_surf
frag.torsion_probability = energy_surf.torsion_probability.copy(
)
# randomly sample according to the Boltzmann distribution
frag.dihe_pool = list(
numpy.random.choice(frag.dihe_rand,
len(frag.dihe_rand),
replace=False,
p=frag.torsion_probability))
[docs] def getCapperTerminator(self, is_coarse_grain):
"""
Get a terminator moiety to cap the atom in a broken bond.
:type is_coarse_grain: bool
:param is_coarse_grain: whether the structure is coarse grain
:rtype: `Terminator`
:return: terminator moiety of one single H or C atom with Br marker.
"""
struct = get_generic_end_structure(is_coarse_grain=is_coarse_grain)
return Terminator(struct)
[docs] def findNeighborFrags(self, separated_bond_num=2):
"""
Search for and save neighboring fragments for each fragment.
For any fragments, the bond between dihe 2nd and 3rd atoms is rotatable.
If any other fragments has any atoms that are within separated_bond_num bonds
away from the dihe 2nd and 3rd atoms of one certain fragment, they are
considered neighbor fragments of that fragment.
:type separated_bond_num: int
:param separated_bond_num: criteria for defining neighbor fragments
"""
graph = analyze.create_nx_graph(self.template_polymer)
for cur_frag in self.first_frag.fragments(include_self=False):
pre_frag = cur_frag.pre_frag
# Search for the most faraway parent fragment
while pre_frag.pre_frag:
if networkx.shortest_path_length(
graph,
source=cur_frag.dihedral[2],
target=pre_frag.dihedral[2]) <= separated_bond_num:
pre_frag = pre_frag.pre_frag
else:
break
# Add the the most faraway parent fragment whose dihe 3rd is outside the
# searching range, but dihe 4th is inside the searching range
cur_frag.neighbor_frags.append(pre_frag)
next_frags = pre_frag.next_frags[:]
# search and save other parent fragments, itself, and child fragments
while next_frags:
frag = next_frags.pop()
bond_num_to_dihe_2nd = networkx.shortest_path_length(
graph, source=cur_frag.dihedral[1], target=frag.dihedral[2])
bond_num_to_dihe_3rd = networkx.shortest_path_length(
graph, source=cur_frag.dihedral[2], target=frag.dihedral[2])
if min([bond_num_to_dihe_3rd, bond_num_to_dihe_2nd
]) <= separated_bond_num:
cur_frag.neighbor_frags.append(frag)
next_frags += frag.next_frags[:]
[docs] def setLocalStructAtomIds(self):
"""
Set the local struct atom ids and end atom ids defined by neighbor fragments.
"""
for cur_frag in self.first_frag.fragments(include_self=False):
ini_as_neighbor = False
# add atom ids in neighbor_frags to local_struct_atom_ids
for frag in cur_frag.neighbor_frags:
cur_frag.local_struct_atom_ids.update(frag.atom_ids)
if not frag.pre_frag:
ini_as_neighbor = True
skip_ini_end_atom_id = set()
# fine tune local_struct_atom_ids for those atoms in rotatable bonds
excluded_side_frags = {cur_frag.fragnum}
neighbor_frags = set(cur_frag.neighbor_frags)
for frag in cur_frag.neighbor_frags:
if not frag.pre_frag:
for next_frag in frag.next_frags:
if next_frag in neighbor_frags and \
next_frag.fragnum not in excluded_side_frags:
# Bond between INI frag and next_frag is not broken
# save side rotatable bonds for conformer search
cur_frag.local_struct_side_dihedrals.append(
next_frag.dihedral[:])
excluded_side_frags.add(next_frag.fragnum)
continue
# this is not INI frag
if frag.dihedral[2] not in cur_frag.local_struct_atom_ids:
# cur_frag is the most faraway parent fragment other than INI
cur_frag.local_struct_atom_ids.update(frag.dihedral[1:3])
cur_frag.end_atom_ids.append(frag.dihedral[1])
# useful when INI has multiple R groups
if not frag.pre_frag.pre_frag:
# This is the dihe 3rd atom saved in a frag connected to INI
skip_ini_end_atom_id.add(frag.dihedral[2])
for next_frag in frag.next_frags:
if next_frag.dihedral[
3] not in cur_frag.local_struct_atom_ids:
# frag is one of the most faraway child fragments
cur_frag.end_atom_ids.append(next_frag.dihedral[2])
elif next_frag.fragnum not in excluded_side_frags:
# Bond between frag and next_frag is not broken
# save side rotatable bonds for conformer search
cur_frag.local_struct_side_dihedrals.append(
next_frag.dihedral[:])
excluded_side_frags.add(next_frag.fragnum)
if ini_as_neighbor:
for atom_id in self.first_frag.bonded_atom_ids:
if atom_id not in skip_ini_end_atom_id:
cur_frag.end_atom_ids.append(atom_id)
[docs] def isCgPolymer(self):
"""
Coarse grain particles of one type are considerred as one type
of polymer residue. If one cg structure has INI, TRM, and others
as residues, consider it built from polymer builder, assuming
all bonds between particles are rotatable.
:rtype: bool
:return: True, if the cg structure is originally from polymer builder
and prepared properly.
"""
int_trm_set = {INI, TRM}
residue_name = {}
residue_number = 0
monomer_idx = 0
for atom in self.template_polymer.atom:
residue_number += 1
if atom.name not in residue_name:
if atom.name in int_trm_set:
# INT and TRM do not change pdbres
residue_name[atom.name] = atom.name
else:
# Monomers have A-Z as pdbres, which means 0-25 for
# monomer_idx.
if monomer_idx == 26:
# This is not a Cg polymer as the number of monomer
# types exceeds 26
return False
atom_name = string.ascii_uppercase[monomer_idx].ljust(4)
residue_name[atom.name] = atom_name
monomer_idx += 1
atom.pdbres = residue_name[atom.name]
atom.resnum = residue_number
atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP] = 1
atom.property[HEAD_ATOM_PROP] = True
# Original Monomers have TAIL_ATOM_PROP
# For newly broken ones from side groups, extra markers cause no trouble
atom.property[TAIL_ATOM_PROP] = True
if atom.bond_total > 2:
atom.property[BRANCH_ATOM_PROP] = True
return len(residue_name) > 2 and (INI
in residue_name) and (TRM
in residue_name)
[docs] def renumberResGetM2PMap(
self, monomer_orig_idx_prop=msprops.MONOMER_ORIG_ATOM_IDX_PROP):
"""
Loop over all the residues and create mapping from atom id in residue
to that in polymer.
:param monomer_orig_idx_prop: The atom property to define the original
atom index
:type monomer_orig_idx_prop: str
"""
self.connected_resnums = {}
self.atom_idx_map_m2p = {}
for residue_num, residue in enumerate(self.template_polymer.residue, 1):
residue.resnum = residue_num
self.atom_idx_map_m2p[residue_num] = {}
for atom in residue.atom:
# Create mapping between atom index to connected resnum
self.connected_resnums.setdefault(atom.index, [])
# Create map from atom id in monomer to that in polymer
# xx_atom_id_monomer refers to the atom id in monomer
self.atom_idx_map_m2p[residue_num][
atom.property[monomer_orig_idx_prop]] = atom.index
[docs] def updateAllAtomIndexMaps(self):
"""
Get information from previous polymer build for polymer grow in cell.
"""
self.residue_connectivity = {}
self.backbone_side_atoms = {}
self.backbone = {}
self.backbone_to_marked_end = {}
self.rotatable_bonds = {}
self.setInitiatorResidue()
for residue in self.template_polymer.residue:
residue_type = residue.pdbres.rstrip()
moiety = self.moieties.getMoiety(residue_type)
residue_num = residue.resnum
self.setResidueConnectivity(moiety, residue_num)
self.setBackboneAndSideGroup(moiety, residue_num)
if residue_type == INI or residue_type == TRM:
continue
self.setPathtoBranchingAtom(moiety.backbone_to_marked_end,
residue_num)
self.setRotatableBond(moiety.rotatable_bonds, residue_num)
[docs] def setInitiatorResidue(self):
"""
Save the residue containing the polymer initiator.
"""
for residue in self.template_polymer.residue:
if residue.pdbres.rstrip() == INI:
self.initiator_res = residue
return
[docs] def setResidueConnectivity(self, moiety, residue_num):
"""
Create mapping from connected resnum pair to connected atom pair.
:type moiety: `schrodinger.structure.Structure`
:param moiety: one moiety used to construct part of the polymer
:type residue_num: int
:param residue_num: residue num
"""
# residue_connectivity[residue num][connected resnum] =
# [atom id in residue, connected atom in connected resnum]
self.residue_connectivity[residue_num] = {}
for marker_id in moiety.markers:
# atom in current residue bonded to another residue
atom_id = self.atom_idx_map_m2p[residue_num][next(
moiety.raw_struct.atom[marker_id].bonded_atoms).index]
for neighbor in self.template_polymer.atom[atom_id].bonded_atoms:
if neighbor.resnum != residue_num:
self.residue_connectivity[residue_num][neighbor.resnum] = [
atom_id, neighbor.index
]
if neighbor.resnum not in self.connected_resnums[atom_id]:
self.connected_resnums[atom_id].append(neighbor.resnum)
[docs] def setBackboneAndSideGroup(self, moiety, residue_num):
"""
Update backbone and side group maps.
:type moiety: `schrodinger.structure.Structure`
:param moiety: one moiety used to construct part of the polymer
:type residue_num: int
:param residue_num: residue num
"""
self.backbone_side_atoms[residue_num] = OrderedDict()
# Backbone atom id in monomer (bb_atm_id_m)
for bb_atm_id_m in moiety.backbone_path_indexes:
# Backbone atom id in polymer (bb_atm_id_p)
bb_atm_id_p = self.atom_idx_map_m2p[residue_num][bb_atm_id_m]
self.backbone_side_atoms[residue_num][bb_atm_id_p] = []
# Side atom id in monomer (side_atm_id_m)
for side_atm_id_m in moiety.backbone_side_atoms[bb_atm_id_m]:
side_atm_id_p = self.atom_idx_map_m2p[residue_num][
side_atm_id_m]
# [residue_num][backbone_atom_id] = [side_group_atom_ids]
self.backbone_side_atoms[residue_num][bb_atm_id_p].append(
side_atm_id_p)
[docs] def setPathtoBranchingAtom(self, moiety_bb_to_marked_end, residue_num):
"""
Update maps for the path between backbone atom to atom branching point.
:type moiety_bb_to_marked_end: dict
:param moiety_bb_to_marked_end: Keys are backbone atom indexes, values are
dictionaries with keys being the branching atom and values being the path
between backbone atom and branching atom
:type residue_num: int
:param residue_num: residue num
"""
for backbone_atom_id, branching_iterm in moiety_bb_to_marked_end.items(
):
polymer_backbone_atom_id = self.atom_idx_map_m2p[residue_num][
backbone_atom_id]
self.backbone_to_marked_end[polymer_backbone_atom_id] = {}
for branching_atom_id, path_with_ends in branching_iterm.items():
new_path_with_ends = []
for atom_id in path_with_ends:
new_path_with_ends.append(
self.atom_idx_map_m2p[residue_num][atom_id])
bonded_to_marker = self.atom_idx_map_m2p[residue_num][
branching_atom_id]
self.backbone_to_marked_end[polymer_backbone_atom_id][
bonded_to_marker] = new_path_with_ends
[docs] def setRotatableBond(self, moiety_rotatable_bonds, residue_num):
"""
Update maps for the path between backbone atom to atom branching point.
:type moiety_rotatable_bonds: list of tuple
:param moiety_rotatable_bonds: (atom 1, atom 2) between which bond is rotatable
:type residue_num: int
:param residue_num: residue num
"""
rotatable_bonds = []
for atom0, atom1 in moiety_rotatable_bonds:
rotatable_bonds.append({
self.atom_idx_map_m2p[residue_num][atom0],
self.atom_idx_map_m2p[residue_num][atom1]
})
# Rotatable_bonds[residue_type] = (set(atom1, atom2)...)
# and the bond between atom1 and atom2 is rotatable
self.rotatable_bonds[residue_num] = tuple(rotatable_bonds)
[docs] def createFragForInitiator(self):
"""
Create the the first polymer fragment from the initiator and
append the following children of it.
"""
cur_resnum = self.initiator_res.resnum
self.first_frag = PolymerFragment(
cur_resnum, 0, template_polymer=self.template_polymer)
for atom in self.initiator_res.atom:
self.first_frag.atom_ids.append(atom.index)
# Multiple Rx groups in INI
next_resnum = list(self.getDelNextResnumByResnum(cur_resnum))
for next_resnum, [atom_id, next_atom_id] in next_resnum:
sub_backbone = []
# Try to grab another atom in INI as dihedral[0]
atom_id_ini0 = self.bondedAtomIdInSameRes(atom_id)
if atom_id_ini0:
# Add the atom connected to the terminating atom in INI
sub_backbone.append(atom_id_ini0)
self.first_frag.pre_backbone.append(atom_id_ini0)
self.backbone_to_marked_end[atom_id_ini0] = {}
# Add terminating atom in INI to sub_backbone and pre_backbone
sub_backbone.append(atom_id)
self.first_frag.pre_backbone.append(atom_id)
backbone_atom_ids = self.resBackbone(next_resnum, next_atom_id)
for idx, backbone_atom_id in enumerate(backbone_atom_ids):
# Add first atom in first monomer
sub_backbone.append(backbone_atom_id)
self.first_frag.atom_ids.append(backbone_atom_id)
self.first_frag.pre_backbone.append(backbone_atom_id)
if len(sub_backbone) > 2 and (set(
sub_backbone[-2::]) in self.rotatable_bonds[next_resnum]
or idx == 0):
# dihe 2nd-3rd is found rotatable
try:
dihe_4th_atom_id = backbone_atom_ids[idx + 1]
except IndexError:
# dihe 3rd is the last atom in monomer
dihe_4th_atom_id = None
self.first_frag.resnum = next_resnum
# Rotatable bond found before M-M bond
self.createEndSubFragment(self.first_frag, next_resnum,
sub_backbone, next_resnum,
dihe_4th_atom_id)
break
else:
# dihe 1st-2rd or non-rotatable dihe 2nd-3rd
# Add side atoms and continue to add more backbone atom
self.first_frag.atom_ids += self.backbone_side_atoms[
next_resnum][backbone_atom_id]
continue
else:
# When no rotatable bonds in M or struct H-C (H-M as polymer)
cur_resnum = next_resnum
# This for loop is to tackle branching at dihe 2nd
for next_resnum, [atom_id, next_atom_id
] in self.getDelNextResnumByResnum(cur_resnum):
self.first_frag.atom_ids.append(next_atom_id)
backbone_atom_ids = self.resBackbone(
next_resnum, next_atom_id)
dihe_4th_atom_id = None if len(
backbone_atom_ids) == 1 else backbone_atom_ids[1]
self.first_frag.resnum = next_resnum
# Rotatable M-M bond
ordered_backbone = self.getInitiatorSubBackbone(
sub_backbone, next_atom_id)
self.createEndSubFragment(self.first_frag, next_resnum,
ordered_backbone + [next_atom_id],
next_resnum, dihe_4th_atom_id)
for frag in self.first_frag.next_frags:
self.addBranchingAtoms(frag)
# save the end atom ids for ini fragment
self.first_frag.bonded_atom_ids = []
for frag in self.first_frag.next_frags:
self.first_frag.bonded_atom_ids.append(frag.dihedral[2])
[docs] def getInitiatorSubBackbone(self, sub_backbone, dihe_3rd):
"""
Return a list of atom ids: the last two atoms are bonded and the last
bonds with the dihe 3rd atom.
:type sub_backbone: list
:param sub_backbone: the last two atoms are bound and the last one binds
with the dihe 3rd atom.
:type dihe_3rd: int
:param dihe_3rd: the 3rd atom in a dihedral
"""
# Dihedral angle atoms are bonded: dihe_1st - dihe_2nd - dihe_3rd
# By default sub_backbone[-2:] = [dihe_1st, dihe_2nd]
if self.template_polymer.areBound(sub_backbone[-1], dihe_3rd):
# No modification needed, since dihe_2nd and dihe_3rd are bonded
return sub_backbone
# Redefine dihe_2nd by searching the atoms in first_frag
for dihe_2nd in self.first_frag.atom_ids:
if self.template_polymer.areBound(dihe_2nd, dihe_3rd):
# The dihe_2nd atom is in first_frag and bonds with dihe_3rd
break
for dihe_1st_atom in self.template_polymer.atom[dihe_2nd].bonded_atoms:
if dihe_1st_atom.index != dihe_3rd:
# The dihe_1st_atom binds with dihe_2nd and is not dihe_3rd
break
return [dihe_1st_atom.index, dihe_2nd]
[docs] def finishPolymerFrags(self):
"""
Generate all polymer fragments from the first generation fragment
containing INT and the following generation fragments.
"""
fragments = [self.first_frag] + self.first_frag.next_frags[:]
while fragments:
frag = fragments.pop(0)
# dihe_4th_atom_ids saves extra branching sites
for dihe_4th_atom_id, dihe_4th_resnum, sub_backbone in frag.dihe_4th_atom_ids:
backbone_atom_ids = self.continueResBackbone(dihe_4th_atom_id)
new_frag = None
# Along backbone_atom_ids to find rotatable a bond in each branch
for idx, backbone_atom_id in enumerate(backbone_atom_ids):
sub_backbone.append(backbone_atom_id)
if backbone_atom_id not in frag.atom_ids:
frag.atom_ids.append(backbone_atom_id)
rotatable = set(
sub_backbone[-2::]) in self.rotatable_bonds[frag.resnum]
if rotatable or dihe_4th_resnum != frag.resnum:
dihe_4th_atom_id = None if backbone_atom_id == backbone_atom_ids[
-1] else backbone_atom_ids[idx + 1]
new_frag = self.createEndSubFragment(
frag, dihe_4th_resnum, sub_backbone,
dihe_4th_resnum, dihe_4th_atom_id)
if new_frag:
self.addBranchingAtoms(new_frag)
fragments.append(new_frag)
break
else:
# The dihe 2-3rd bond is not a rotatable bond within
# residue and is not a cross-residue bond either
frag.atom_ids += self.backbone_side_atoms[
dihe_4th_resnum][backbone_atom_id]
frag.pre_backbone.append(backbone_atom_id)
continue
else:
# No 2-3 rotatable bonds found, but reach the end of the residue
for next_resnum, [
atom_id, next_atom_id
] in self.getDelNextResnumByAtomID(backbone_atom_id):
connected_backbone_atom_ids = self.resBackbone(
next_resnum, next_atom_id)
if len(connected_backbone_atom_ids) > 1:
dihe_4th_atom_id = connected_backbone_atom_ids[1]
else:
dihe_4th_atom_id = None
frag.atom_ids.append(next_atom_id)
new_frag = self.createEndSubFragment(
frag, next_resnum, sub_backbone + [next_atom_id],
next_resnum, dihe_4th_atom_id)
if new_frag:
self.addBranchingAtoms(new_frag)
fragments.append(new_frag)
resnum = self.template_polymer.atom[
backbone_atom_id].resnum
if not self.residue_connectivity.get(resnum):
# MATSCI-10158: the in-place modification on self.residue_connectivity
# empties the content, which causes tracebacks in the next loop.
break
else:
self.addBranchingAtoms(frag)
[docs] def appendMissedFrags(self):
"""
When polymer fragments don't find all the atoms in the original structure,
this methods first groups them by bond connections and appends each group
to a found fragment.
"""
# This is a temporary workaround that misses the rotatable bonds between
# the missed atom groups and the found fragments.
# This also ignores all rotatable bond within the missed atom group.
# Collect the edge cases for a general fix all-togather: MATSCI-7009
all_atom_ids = set(self.template_polymer.getAtomIndices())
self.first_frag.adjustAtomIds(0)
all_missed_atom_ids = all_atom_ids.difference(
set(self.first_frag.atom_ids_in_mol))
if not all_missed_atom_ids:
return
missed_atom_id_groups = get_missed_atom_id_groups(
self.template_polymer, all_missed_atom_ids)
connection_infos = self.getConnectionInfos(missed_atom_id_groups,
all_missed_atom_ids)
msg = (
f"{connection_infos[0].connecting_atom_ids} in found fragments connect "
f"missed_atom_id_groups {connection_infos[0].missed_atom_ids}")
if self.logger:
self.logger.debug(msg)
for connection_info in connection_infos:
for frag in self.first_frag.fragments():
intersection_ids = connection_info.connecting_atom_ids.intersection(
frag.atom_ids)
if intersection_ids:
# This assumes that the linking between the missed frag
# and the found one is not rotatable and all bonds within
# the missed atoms are not rotatable
frag.atom_ids += connection_info.missed_atom_ids
[docs] def getConnectionInfos(self, missed_atom_id_groups, all_missed_atom_ids):
"""
Return the connecting atoms, which belong to the atoms in the found
fragment and connect to the missed atoms.
:type missed_atom_id_groups: list of list
:param missed_atom_id_groups: each sublist has missed atoms connected by
covalent bonds.
:type all_missed_atom_ids: list of int
:param all_missed_atom_ids: a flat list of all missed atom indexes
:rtype: list of set
:return: each set has the connecting atoms, which belong to the found
atoms in the fragment and are connected to the missed atoms.
"""
connection_infos = []
for missed_atom_id_group in missed_atom_id_groups:
connecting_atom_group = []
for atom_id in missed_atom_id_group:
connecting_atom_group += [
at.index
for at in self.template_polymer.atom[atom_id].bonded_atoms
]
connecting_atom_group = set(connecting_atom_group)
connecting_atom_group = connecting_atom_group.difference(
all_missed_atom_ids)
connection_info = ConnectionInfo(
missed_atom_ids=missed_atom_id_group,
connecting_atom_ids=connecting_atom_group)
connection_infos.append(connection_info)
return connection_infos
[docs] def assignRingAtomIds(self):
"""
Assign ring atom indexes to each fragment.
"""
# FIXME: rings_atom_ids shoud be searched in monomers and mapped to polymer
for frag in self.first_frag.fragments():
for ring in self.template_polymer.ring:
rings_atom_ids = ring.getAtomIndices()
shared_atom_ids = set(
frag.atom_ids).intersection(rings_atom_ids)
if len(shared_atom_ids) > 1:
frag.rings_atom_ids.append(rings_atom_ids[:])
[docs] def structToOriginByFrag(self):
"""
Move the structure so the centroid of the fragment
is at the origin.
"""
center = self.getFragCentroid(self.template_polymer, self.first_frag)
self.template_polymer.setXYZ(self.template_polymer.getXYZ() - center)
[docs] def getFragCentroid(self, struct, frag):
"""
Calculate the centroid of polymer fragment.
:type frag: `schrodinger.structure.Structure`
:param frag: polymer structure to get atom positions from
:type frag: `Polymerfragment`
:param frag: The polymer fragment to get atom ids from
:rtype: list
:return: the centroid of polymer fragment
"""
return transform.get_centroid(struct, atom_list=frag.atom_ids)[:3]
[docs] def getDelNextResnumByResnum(self, cur_resnum):
"""
Given resnum, find all the connected residues and atoms.
Delete the connectivity from current residue to next residue
and the connectivity from next residue to current residue.
:type cur_resnum: int
:param cur_resnum: the residue number which the atom id belongs to
:type del_connection: bool
:param del_connection: del the connection from current atom and residue
to the connected atom and residue
:rtype: iterator to generate int, [int, int]
:return: the connected residue number, [atom id, connected atom id]
"""
for next_resnum, [atom_id, next_atom_id] in list(
self.residue_connectivity[cur_resnum].items()):
self.delConnectivity(cur_resnum, next_resnum, atom_id, next_atom_id)
yield next_resnum, [atom_id, next_atom_id]
[docs] def delConnectivity(self, cur_resnum, next_resnum, atom_id, next_atom_id):
"""
Delete the connectivity from current residue to next residue
and the connectivity from next residue to current residue.
:type cur_resnum: int
:param cur_resnum: the residue number which the current atom id belongs to
:type next_resnum: int
:param next_resnum: the residue number which the connected atom id belongs to
:type atom_id: int
:param atom_id: the atom id from which other connections are searched
:type next_atom_id: int
:param next_atom_id: one connected atom id
"""
self.residue_connectivity[next_resnum].pop(cur_resnum)
self.connected_resnums[next_atom_id].remove(cur_resnum)
self.residue_connectivity[cur_resnum].pop(next_resnum)
self.connected_resnums[atom_id].remove(next_resnum)
[docs] def createEndSubFragment(self,
cur_frag,
dihe_3rd_resnum,
sub_backbone,
dihe_4th_resnum,
dihe_4th_atom_id,
append_to_frag=None,
last_created_frag=False):
"""
Create one child PolymerFragment or append atoms to append_to_frag.
:type cur_frag: `PolymerFragment`
:param cur_frag: current polymer fragment
:type dihe_3rd_resnum: int
:param dihe_3rd_resnum: the resnum of the 3rd atom in dihedral
:type sub_backbone: list of int
:param sub_backbone: the backbone whose last atom (dihedral 3rd) will
be connected to new fragment
:type dihe_4th_resnum: int
:param dihe_4th_resnum: a guess of the resnum of the 4th atom in res
:type dihe_4th_atom_id: int or None
:param dihe_4th_atom_id: the 4th atom of dihedral, if int; None, if
the 4th atom is in the adjacent res
:type append_to_frag: PolymerFragment or None
:param append_to_frag: if None, the found 'branching' is new polymer
fragment, and new PolymerFragment is created with dihedral_4th_atom.
if not None, the found 'branching' is part of the old polymer fragment
and no new PolymerFragment is created. Add dihedral_4th_atom to old
polymer fragment.
:type last_created_frag: bool
:param last_created_frag: if True, will not return fragment
:rtype: `PolymerFragment` or None
:return: the newly created `PolymerFragment`, or None if atoms are
appended to append_to_frag fragment
"""
dihe_3rd_atom_id = sub_backbone[-1]
if not dihe_4th_atom_id:
# Dihedral 3rd and 4th atoms in the different residues
# in other words, bond connects 2 residues
# dihe_4th_resnum and dihe_4th_atom_id are assigned by for loop
for dihe_4th_resnum, [
atom_id, dihe_4th_atom_id
] in self.getDelNextResnumByAtomID(dihe_3rd_atom_id):
# At the end of the residue, dihedral 3rd may connect to
# multiple residues, get one as the connecting residue
break
else:
# dihedral 3rd is the termination atom of current moiety
dihe_4th_atom_id = self.bondedAtomIdInSameRes(dihe_3rd_atom_id)
# FIXME: moiety broken from side group chain has no attached TRM
if dihe_4th_atom_id and self.template_polymer.atom[
dihe_4th_atom_id].pdbres.rstrip() == TRM:
# The bond between TRM and the rest of a polymer is considered
# rotatable, as long as TRM has more than one atom.
last_created_frag = True
else:
cur_frag.atom_ids += self.backbone_side_atoms[
dihe_3rd_resnum][dihe_3rd_atom_id]
return None
if not append_to_frag:
new_frag = PolymerFragment(dihe_3rd_resnum,
cur_frag.generation + 1,
pre_frag=cur_frag,
pre_backbone=sub_backbone)
new_frag.dihedral = sub_backbone[-3::] + [dihe_4th_atom_id]
new_frag.dihe_4th_resnum = dihe_4th_resnum
new_frag.atom_ids += self.backbone_side_atoms[dihe_3rd_resnum][
dihe_3rd_atom_id]
cur_frag.next_frags.append(new_frag)
else:
new_frag = append_to_frag
# Save branching dihe 4th atoms and related pre_backbone
new_frag.dihe_4th_atom_ids.append(
tuple([dihe_4th_atom_id, dihe_4th_resnum, sub_backbone[-2::]]))
return None if (append_to_frag or last_created_frag) else new_frag
[docs] def addBranchingAtoms(self, cur_frag):
"""
Append extra dihedral 4th atoms for branching. Add to the parent of cur_frag
instead of cur_frag, if the current fragment and new branch share the 1st,
2nd, and 3rd atoms in a dihedral angle.
:type cur_frag: `PolymerFragment`
:param cur_frag: branching points will be added to the this fragment
"""
# cur_frag.dihedral[2] is the dihe 3rd atom in cur_frag
dihe_3rd_and_all = [cur_frag.dihedral[2]] + cur_frag.atom_ids
for idx, atom_id in enumerate(cur_frag.pre_backbone):
if atom_id not in self.backbone_to_marked_end:
# 1 atom in INI could hit this
continue
for bonded_to_marker, path_with_ends in self.backbone_to_marked_end[
atom_id].items():
branching_atom_id = path_with_ends[-1]
sub_backbone = cur_frag.pre_backbone[:idx] + path_with_ends
for dihe_4th_resnum, [
cur_atom_id, dihe_4th_atom_id
] in self.getDelNextResnumByAtomID(branching_atom_id):
if branching_atom_id not in dihe_3rd_and_all:
frag = cur_frag.pre_frag
else:
frag = cur_frag
frag.dihe_4th_atom_ids.append(
tuple([
dihe_4th_atom_id, dihe_4th_resnum,
sub_backbone[-2::]
]))
[docs] def resBackbone(self, resnum, atom_id):
"""
Given residue number and starting atom id, return the short
backbone path (from the starting atom id to the end) and set
head/tail using dictionary.
:type resnum: int
:param resnum: the residue number
:type atom_id: int
:param atom_id: backbone path starts from this atom id
:rtype: list of int
:return: the atom ids of backbone from atom_id to the other end
"""
backbone = list(self.backbone_side_atoms[resnum])
# Monomer may be connected as headfirst or tailfirst
is_headfirst = backbone[0] == atom_id
if is_headfirst:
self.backbone[resnum] = backbone
else:
self.backbone[resnum] = backbone[::-1]
return self.backbone[resnum]
[docs] def continueResBackbone(self, atom_id):
"""
Return rest of the backbone path continuing from the atom_id.
:type atom_id: int
:param atom_id: atom id of the starting point
:rtype: list
:return: the atom ids of backbone from atom_id to the other end
"""
resnum = self.template_polymer.atom[atom_id].resnum
try:
backbone = self.backbone[resnum]
except KeyError:
# set backbone by head-tail and return backbone
return self.resBackbone(resnum, atom_id)
index = backbone.index(atom_id)
return backbone[index:]
[docs] def bondedAtomIdInSameRes(self, atom_id):
"""
Find the atom id of a neighbor atom within the same residue.
:type atom_id: int
:param atom_id: atom id
:rtype: int or None
:return: the atom id of a neighbor atom in the same residue
"""
atom = self.template_polymer.atom[atom_id]
atom_resnum = atom.resnum
for neighbor in atom.bonded_atoms:
if neighbor.resnum == atom_resnum:
return neighbor.index
return None
[docs] def getDelNextResnumByAtomID(self, atom_id):
"""
Given atom id, find all the connected residues and atoms.
Delete the connectivity from current residue to next residue
and the connectivity from next residue to current residue.
:type atom_id: int
:param atom_id: the atom id from which all connections are returned
:type del_connection: bool
:param del_connection: del the connection from current atom and residue
to the connected atom and residue
:rtype: iterator to generate int, [int, int]
:return: the connected residue number, [atom id, connected atom id]
"""
cur_resnum = self.template_polymer.atom[atom_id].resnum
for next_resnum in self.connected_resnums[atom_id][:]:
next_atom_id = self.residue_connectivity[cur_resnum][next_resnum][1]
self.delConnectivity(cur_resnum, next_resnum, atom_id, next_atom_id)
yield next_resnum, [atom_id, next_atom_id]
[docs] def isAllAtomPolymer(self):
"""
Each polymer built by polymer has one INI, TRM, and at least one MOMONOMER;
each atom in polymer has MONOMER_ORIG_ATOM_IDX_PROP property; residues from
the same moiety should have same number of atoms.
:rtype: bool
:return: True, if the structure is built using polymer builder.
"""
residue_names = {}
for residue in self.template_polymer.residue:
for atom in residue.atom:
if not atom.property.get(msprops.MONOMER_ORIG_ATOM_IDX_PROP):
# manually added atoms do not have MONOMER_ORIG_ATOM_IDX_PROP
# when users add some atoms to one polymer
return False
residue_type = residue.pdbres.rstrip()
res_appeared_before = residue_type in residue_names
if residue_type == INI:
if res_appeared_before:
# one single INI for each polymer
return False
elif res_appeared_before:
if len(residue.getAtomIndices()) != residue_names[residue_type]:
# residues from the same moiety should have same number of atoms
# when users del some atoms from one polymer
return False
continue
residue_names[residue_type] = len(residue.getAtomIndices())
return (INI in residue_names) and (TRM in residue_names) and (
len(residue_names) > 2)
[docs] def breakIntoMols(self):
"""
Make a copy of the template polymer and break the structure into
molecules serving as INI, TRM, and Monomer.
:rtype: {schrodinger.Structure.structure}, int
:return: the structure copy broken into molecules serving as a moiety,
atom id of one atom in INI
"""
head_atom_id, _ = self.findHeadTail(self.template_polymer)
side_atom_ids = self.template_polymer.getAtomIndices()
side_atom_ids.remove(head_atom_id)
nr_bonds = nearest_rotatable_bonds(self.template_polymer,
self.template_polymer_rings,
head_atom_id,
side_atom_ids,
pre_atom_prop=HEAD_ATOM_PROP,
atom_prop=HEAD_ATOM_PROP)
log_debug(f"breakIntoMols: head atom id: {head_atom_id}")
log_debug(f"breakIntoMols: nearest rotatable bonds: {nr_bonds}")
if self.debug:
# Mark the bond to be break for debugging mode
for atom1, atom2 in nr_bonds:
bond = self.template_polymer.getBond(atom2, atom1)
bond.setStyle(structure.ATOM_BALLNSTICK)
struct_copy = self.template_polymer.copy()
for atom_ids in nr_bonds:
struct_copy.deleteBond(*atom_ids)
head_atom_ids = [atom_ids[1] for atom_ids in nr_bonds]
for mol in struct_copy.molecule:
mol_atom_ids = set(mol.getAtomIndices())
if head_atom_id in mol_atom_ids:
# this mol is INI
continue
# Longest simple path with end marked as tail atom
mono_head_atom_id = mol_atom_ids.intersection(head_atom_ids).pop()
# Mark the backbone for debugging mode
_, mono_tail_atom_id = self.findHeadTail(self.template_polymer,
atom_ids=mol_atom_ids,
source=mono_head_atom_id,
set_style=self.debug)
mono_tail_atom = self.template_polymer.atom[mono_tail_atom_id]
mono_tail_atom.property[TAIL_ATOM_PROP] = True
if self.debug:
# Mark the rings and write out intermediate st for debugging mode
for idx, rings in enumerate(self.template_polymer_rings):
color = get_color(idx)
for atom_id in rings:
self.template_polymer.atom[atom_id].color = color
filename = 'break_into_mols_' + self.debug_file
self.template_polymer.write(filename)
jobutils.add_outfile_to_backend(filename, backend=self.backend)
log_debug(f"breakIntoMols: {filename} has nearest rotatable bonds"
" and atoms in rings marked.")
return struct_copy, head_atom_id
[docs] @staticmethod
def findHeadTail(struct, atom_ids=None, source=None, set_style=False):
"""
Find the head and tail atoms of the longest path. If multiple longest
paths are found, rank those longest paths by the head atom ids and then
tail atom ids. Choose the longest path with the smallest head/tail atom
ids.
:param struct: the structure to find head and tail
:type struct: "schrodinger.structure.Structure"
:param atom_ids: list of int or None
:type atom_ids: atom ids of the target molecule. If None, all the atoms
in the struct are used.
:param source: node (head atom index), Starting node for path.
If not specified, compute shortest path lengths using all nodes as
source nodes.
:type source: int
:param set_style: Mark the head, tail, and backbone atoms
:type source: bool
:rtype: int, int
:return: atom ids for head and tail atoms
"""
if not atom_ids:
atom_ids = struct.getAtomIndices()
graph = analyze.create_nx_graph(struct, atoms=atom_ids)
smallest_atom_id = min(atom_ids)
head_tail_pairs = collections.defaultdict(list)
# In case there is only one atom
head_tail_pairs[smallest_atom_id].append(smallest_atom_id)
max_path_length = 0
shortest_path_length = networkx.shortest_path_length(graph,
source=source)
if source:
shortest_path_length = [(source, shortest_path_length)]
for tmp_head_atom_id, tail_and_path_length in shortest_path_length:
for tmp_tail_atom_id, path_length in tail_and_path_length.items():
if max_path_length < path_length:
# A longer path found: clear the stored head / tail pairs
max_path_length = path_length
head_tail_pairs = collections.defaultdict(list)
if max_path_length == path_length:
# Multiple longest paths found: append new head / tail pair
head_tail_pairs[tmp_head_atom_id].append(tmp_tail_atom_id)
# Get the minimum of the head and tail atom id sets so that the pair is
# unique for one structure
head_atom_id = min(head_tail_pairs.keys())
tail_atom_id = min(head_tail_pairs[head_atom_id])
log_debug(
f"shortest_path: {networkx.shortest_path(graph, source=head_atom_id, target=tail_atom_id)}"
)
if set_style:
spath = networkx.shortest_path(graph,
source=head_atom_id,
target=tail_atom_id)
for atom1, atom2 in zip(spath[:-1], spath[1:]):
bond12 = struct.getBond(atom1, atom2)
bond12.setStyle(structure.ATOM_CPK)
bond21 = struct.getBond(atom2, atom1)
bond21.setStyle(structure.ATOM_CPK)
h_atom = struct.atom[spath[0]]
h_atom.style = structure.ATOM_BALLNSTICK
t_atom = struct.atom[spath[-1]]
t_atom.style = structure.ATOM_BALLNSTICK
return head_atom_id, tail_atom_id
[docs] def setAndMarkResidues(self):
"""
Break the molecule into fragments by rotatable bonds and set each fragment
as one residue.
:rtype: bool
:return: True, if struct_copy has more than one molecule, serving as
INI, TRM, and Monomers
"""
if self.template_polymer.atom_total < 4:
# Structures with less than 4 atoms don't have rotatable bonds
return False
# non-polymer struct may be obtained by modifying polymer struct
for atom in self.template_polymer.atom:
for prop in ATOM_PROP_MARKER:
atom.property[prop] = False
struct_copy, head_atom_id = self.breakIntoMols()
if len(struct_copy.molecule) == 1:
return False
if len(struct_copy.molecule) > self.LETTER_NUM**2:
return False
for residue_number, mol in enumerate(struct_copy.molecule, 1):
atom_indexes = set(mol.getAtomIndices())
if head_atom_id in atom_indexes:
residue_name = INI
else:
floor_div = residue_number // self.LETTER_NUM
residue_name = chr(65 + floor_div) if floor_div else ''
residue_name += chr(65 + residue_number % 26)
residue_name = residue_name.ljust(4)
self.setResProperties(residue_name, atom_indexes, residue_number)
if self.debug:
for residue_number, mol in enumerate(struct_copy.molecule, 1):
color = get_color(residue_number)
atom_indexes = set(mol.getAtomIndices())
for atom_index in atom_indexes:
self.template_polymer.atom[atom_index].color = color
# Residues of different colors and additional validation for debug mode
filename = 'set_and_mark_residues_' + self.debug_file
self.template_polymer.write(filename)
jobutils.add_outfile_to_backend(filename, backend=self.backend)
log_debug(
f"setAndMarkResidues: {filename} has residues in different colors."
)
assert len(struct_copy.molecule) == len(
self.template_polymer.residue)
return True
[docs] def setResProperties(self, residue_name, atom_indexes, residue_number):
"""
Reassign atom properties so that atoms of atom_indexes are treated
as one residue with proper MONOMER_ORIG_ATOM_IDX_PROP.
NOTE: MONOMER_ORIG_ATOM_IDX_PROP MODIFIED BASED ON ATOMS IN RESIDUE
:type residue_name: str
:param residue_name: the residue name for atoms in this residue
:type atom_indexes: list of int
:param atom_indexes: the atom ids for atoms in one residue
:type residue_number: int
:param residue_number: the residue number for atom_indexes
"""
for orig_atom_id, atom_id in enumerate(atom_indexes, 1):
atom = self.template_polymer.atom[atom_id]
# All atoms in the same residue must have the same residue number,
# residue name, chain and insertion code
atom.resnum = residue_number
atom.pdbres = residue_name
atom.chain = " "
atom.inscode = " "
atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP] = orig_atom_id
atom.property[MONOMER_ORIG_IDX_PROP2] = orig_atom_id
atom.property.pop(msprops.BACKBONE_ADJOINING_ATOM_PROP, None)
atom.property.pop(msprops.BACKBONE_ATOM_PROP, None)
atom.property.pop(msprops.SIDE_GROUP_ATOM_PROP, None)
[docs] def updateOrigAtomIdx(self):
"""
Update the MONOMER_ORIG_ATOM_IDX_PROP property in each polymer atom
with the atom index of atoms in moiety_structs set by setResProperties().
"""
orig_prop = msprops.MONOMER_ORIG_ATOM_IDX_PROP
if self.debug:
ids_moiety = []
for pdbres, struct in self.moiety_structs.items():
orig_ids = [x.property.get(orig_prop) for x in struct.atom]
ids_moiety += [x for x in orig_ids if x is not None]
# moiety_structs has one INI and multiple monomers (CAS and TRM
# are optional)
if pdbres == TRM:
# TRM is directly connected to one monomer
assert 1 == orig_ids.count(None)
elif pdbres == INI:
# INI is not directly connected to TRM and CAS
snum = 1
if TRM in self.moiety_structs:
snum += 1
if CAS in self.moiety_structs:
snum += 1
none_num = len(self.moiety_structs) - snum
# '<' for Multi-site, '=' for single-site INI
assert none_num <= orig_ids.count(None)
elif pdbres == CAS:
# CAS is directly connected to multiple moieties
assert 1 < orig_ids.count(None)
else:
marker_num = [[x.property.get(y)
for y in ATOM_PROP_MARKER]
for x in struct.atom]
none_num = sum([x.count(True) for x in marker_num])
# Monomer may be connected to one type of moiety multiple times
assert none_num <= orig_ids.count(None)
ids = [
x.property.get(orig_prop) for x in self.template_polymer.atom
]
# polymer may have multiple residues with the same pdbres name
assert set(ids) == set(ids_moiety)
for residue in self.template_polymer.residue:
residue_type = residue.pdbres.rstrip()
for res_atom in residue.atom:
atoms = [
x for x in self.moiety_structs[residue_type].atom
if x.property.get(orig_prop) == res_atom.property[orig_prop]
]
res_atom.property[orig_prop] = atoms[0].index
[docs] def markMoietiesProps(self):
"""
Copy and add additional properties to moiety_structs to pass
the validations in `Moieties`. However, these properties are
not used, since polymer is already built.
:type polymer: `schrodinger.structure.Structure`
:param polymer: structure of a polymer
"""
for residue_struct in self.moiety_structs.values():
for key, value in self.template_polymer.property.items():
if not residue_struct.property.get(MOIETY_PROP):
residue_struct.property[key] = value
if residue_struct.property[MOIETY_PROP].startswith('Monomer_'):
residue_struct.property[TACTICITY_PROP] = TACTICITY_ISO
residue_struct.property[BRANCH_PCT_PROP] = 0.5
residue_struct.property[BRANCH_MAX_PROP] = 0.8
residue_struct.property[BBTRANS_PROP] = True
elif residue_struct.property[MOIETY_PROP].startswith('Cascader'):
residue_struct.property[CASCADE_GEN_PROP] = 2
if self.debug:
# Write out all moieties with Br atoms
filename = 'mark_moieties_props_' + self.debug_file
residue_struct.append(filename)
jobutils.add_outfile_to_backend(filename, backend=self.backend)
log_debug(f"markMoietiesProps: {filename} has all moieties.")
[docs]class PolymerFragment(object):
[docs] def __init__(self,
resnum,
generation,
pre_frag=None,
is_branching=False,
pre_backbone=None,
template_frag=None,
molnum=None,
template_polymer=None):
"""
PolymerFragment class stores the connectivity, dihedral angle, and
anthoer other information for tangled chain method.
:type resnum: int
:param resnum: the resnum of the 3rd atom in dihedral angle
:type generation: int
:param generation: the generation of the fragment
:type pre_frag: `PolymerFragment`
:param pre_frag: The parent fragment
:type is_branching: bool
:param is_branching: True, if current fragment has more than children
:type pre_backbone: list of int
:param pre_backbone: the last atom in pre_backbone is the 3rd dihedral atom
:type template_frag: `PolymerFragment` or None
:param template_frag: the polymer fragment to be copied
:type molnum: int
:param molnum: molecule number
:type template_polymer: `schrodinger.structure.Structure`
:param template_polymer: the structure of one polymer chain
"""
self.resnum = resnum
self.generation = generation
self.in_sidegroup = False
self.atom_ids = []
self.dihedral = []
self.dihe_4th_atom_ids = []
self.orig_dihedral = []
self.next_frags = []
self.pre_backbone = pre_backbone or []
self.pre_frag = pre_frag
self.is_branching = is_branching
self.dihe_pool = []
self.dihe_rand = []
self.dihe_value = None
self.hitted_num = 0
self.max_hit_num = MAX_HIT_NUM
self.molnum = None
self.rings = []
self.rings_atom_ids = []
self.spear_rings = []
self.template_polymer = template_polymer
self.neighbor_frags = []
self.local_struct_atom_ids = set()
self.end_atom_ids = []
self.local_struct_side_dihedrals = []
self.torsion_pattern = None
self.torsion_probability = None
self.tail_atom = None
self.debug = schrodinger.in_dev_env()
if template_frag:
self.preDeepCopy(template_frag, molnum)
[docs] def preDeepCopy(self, original_frag, molnum):
"""
Deep copy the fragment attributes except those linking
different fragments.
:type original_frag: `PolymerFragment`
:param original_frag: Polymerfragment object to be copied
:type molnum: int
:param molnum: molecule number
"""
self.molnum = molnum
self.generation = original_frag.generation
self.in_sidegroup = original_frag.in_sidegroup
self.is_branching = original_frag.is_branching
self.torsion_pattern = original_frag.torsion_pattern
self.atom_ids = original_frag.atom_ids[:]
self.dihedral = original_frag.dihedral[:]
self.dihe_pool = original_frag.dihe_pool[:]
self.dihe_rand = original_frag.dihe_rand[:]
# [[atom ids in first ring],[atom ids in second ring],..]
self.rings_atom_ids = copy.deepcopy(original_frag.rings_atom_ids)
self.next_frags = original_frag.next_frags[:]
self.template_polymer = original_frag.template_polymer
self.torsion_probability = original_frag.torsion_probability
self.tail_atom = original_frag.tail_atom
[docs] def deepCopy(self, molnum):
"""
Deep copy the fragment class from the INI fragment.
:type molnum: int
:param molnum: molecule number
:rtype: `PolymerFragment`
:return: new INI fragment directing to all copied fragments
"""
first_frag = PolymerFragment(self.resnum,
self.generation,
template_frag=self)
first_frag.molnum = molnum
next_frags = [first_frag]
while next_frags:
frag = next_frags.pop(0)
for next_original_frag in frag.next_frags[:]:
frag_copied = PolymerFragment(next_original_frag.resnum,
next_original_frag.generation,
pre_frag=frag,
template_frag=next_original_frag,
molnum=molnum)
next_frags.append(frag_copied)
frag.next_frags.remove(next_original_frag)
frag.next_frags.append(frag_copied)
return first_frag
[docs] def setDihedrals(self, dihe_init):
"""
Set the dihedral angle pool and rand for each fragment.
dihedral angle pool and rand are lists of possible dihedral
angles. When the polymer chain grows, move to next polymer fragment
and set the dihedral angle using a value randomly picked from rand;
When the polymer growing site dies, move to previous polymer fragment,
pop up the used dihedral angle from the pool, and randomly
pick a new one from the left dihedral values in pool.
:type dihe_init: list of floats
:param dihe_init: list of possible dihedral angles
"""
self.dihe_init = dihe_init
fragnum = 1
self.fragnum = fragnum
for frag in self.fragments(include_self=False):
fragnum += 1
frag.fragnum = fragnum
frag.dihe_rand = dihe_init[:]
frag.dihe_pool = dihe_init[:]
random.shuffle(frag.dihe_pool)
frag.max_hit_num = MAX_HIT_NUM
[docs] def markSidegroupFrag(self):
"""
Mark the side group polymer fragment, if the 3rd atom in the dihedral
angle is in side group (not backbone atom).
"""
for frag in self.fragments(include_self=False):
atom_id = frag.dihedral[2]
if self.template_polymer.atom[atom_id].pdbres.rstrip()[-1] != '0':
frag.in_sidegroup = True
[docs] def resetFragDihedralPool(self):
"""
Reset the dihedral angle pool of all following fragment according to
rand in each fragment.
"""
for frag in self.fragments(include_self=False):
frag.dihe_pool = frag.dihe_rand[:]
random.shuffle(frag.dihe_pool)
frag.hitted_num = 0
[docs] def fragments(self, include_self=True):
"""
Yield the first fragment of next_frags and append the children of
that fragment to next_frags for recursion.
:type include_self: bool
:param include_self: If True, loop over itself and all child fragments.
If False, only loop over its child polymer fragments.
:rtype: iterator to generate `PolymerFragment`
:return: one child polymer fragment
"""
next_frags = [self] if include_self else self.next_frags[:]
while next_frags:
this_frag = next_frags.pop(0)
yield this_frag
next_frags += this_frag.next_frags[:]
[docs] def adjustAtomIds(self, pre_atomnum):
"""
Adjust atom ids in PolymerFragment when multipe chains are
added into the cell.
:type pre_atomnum: int
:param pre_atomnum: number of atoms before adding current molecule
"""
mol_atom_ids = []
for frag in self.fragments():
frag.atom_ids = [x + pre_atomnum for x in frag.atom_ids]
mol_atom_ids += frag.atom_ids
for findex, ring_indexes in enumerate(frag.rings_atom_ids):
frag.rings_atom_ids[findex] = [
x + pre_atomnum for x in ring_indexes
]
frag.dihedral = [x + pre_atomnum for x in frag.dihedral]
if frag.tail_atom:
frag.tail_atom += pre_atomnum
self.atom_ids_in_mol = sorted(mol_atom_ids)
self.atom_gids_in_mol = [
atom_ids - 1 for atom_ids in self.atom_ids_in_mol
]
[docs] def markBranchingFrag(self):
"""
Mark the branching fragments.
"""
for frag in self.fragments():
if len(frag.next_frags) > 1:
frag.is_branching = True
[docs] def getDiheValues(self):
"""
Get the possbile dihedral values of current fragment.
:rtype: list
:return: all polymer dihedral vaules
"""
if self.continue_grow:
if self.torsion_probability is not None:
return numpy.random.choice(self.dihe_rand,
len(self.dihe_rand),
replace=False,
p=self.torsion_probability)
dihe_values = self.dihe_rand[:]
random.shuffle(dihe_values)
elif self.hitted_num < self.max_hit_num:
dihe_values = self.dihe_pool[:]
else:
dihe_values = []
return dihe_values
[docs]def assign_markers(residue_struct, polym=None):
"""
Find and add marker atoms to the extracted structures.
:type residue_struct: `schrodinger.structure.Structure`
:param residue_struct: The structure to add the markers to
:type polym: `schrodinger.structure.Structure`
:param polym: If provided, the corresponding head/tail atoms are located in
this reference structure, and the position of Br is determined by bonding
and residue informations
"""
marker_ids = {0: [], 1: [], 2: []}
for atom in residue_struct.atom:
m_types = [y for x, y in ATOM_PROP_MARKER_IDX if atom.property.get(x)]
if not m_types:
continue
if polym:
patoms = [x for x in polym.atom if x.pdbres == atom.pdbres]
patoms = [
x for x in patoms
if x.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP] ==
atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP]
]
patom = patoms[0]
bonded_atoms = [x for x in patom.bonded_atoms]
batoms = [x for x in bonded_atoms if x.pdbres != atom.pdbres]
#
m_types += [m_types[-1]] * (len(batoms) - len(m_types))
for index, marker_type in enumerate(m_types):
# Br atoms are used to mark head, tail, branch, and cas atoms
# As the polymer chain is already built, the Br position is
# not used. Putting them near the head and tail atoms helps
# to visualize which atoms are marked.
if polym:
try:
batom = batoms[index]
except IndexError:
# Br placed away from the residue (no bonds between residues)
batom = bonded_atoms[0]
xyz = numpy.array(patom.xyz) * 2 - numpy.array(batom.xyz)
else:
xyz = (numpy.array(patom.xyz) + numpy.array(batom.xyz)) / 2
residue_struct.addAtom('Br', *xyz)
else:
residue_struct.addAtom('Br', atom.x + 1., atom.y + 1.,
atom.z + 1.)
residue_struct.addBond(atom.index, residue_struct.atom_total, 1)
marker_ids[marker_type].append(residue_struct.atom_total)
marker_id_list = []
for marker_type, atom_ids in marker_ids.items():
marker_id_list += atom_ids
residue_struct.property[MARKER_PROP] = ','.join(map(str, marker_id_list))
[docs]class VolumeMemoryError(Exception):
pass
[docs]class LowMemoryVolGraph(object):
"""
This class mimics a networkx graph for the XYZVolumeGraph class. It has only
minimal networkx functionality. It is necessary because networkx graphs
are giant memory hogs and can't be used for even moderately-sized
sparse grid graphs. This class takes a tiny fraction - a few percent or so -
of the memory for an equivalent networkx graph.
"""
[docs] def __init__(self):
"""
Create a LowMemoryVolGraph instance
"""
self._all_nodes = set()
# Keys of _all_edges are nodes, values are a set of nodes connected to
# that node
self._all_edges = defaultdict(set)
# Blobs are groups of connected nodes - the equivalent of
# networkx.connected_components. Blobs are somewhat expensive timewise
# to find, so they are cached. Each time a change is made to the number
# of nodes or connectivity of the nodes the blob cache is erased.
self._all_blobs = []
self.machine_memory_mb = psutil.virtual_memory().total / float(2**20)
[docs] def __len__(self):
"""
The length of the graph is just the number of nodes
:rtype: int
:return: The number of nodes in the graph
"""
return len(self._all_nodes)
[docs] def checkMemoryLimit(self):
"""
Check that the amount of memory used is not approaching the total memory
available
:raise VolumeMemoryError: If the amount of memory used is too large
compared to the total system memory
"""
mymemory = jobutils.memory_usage_psutil()
# Use 0.80 as some jobs fail at 84% memory usage
if mymemory / self.machine_memory_mb > 0.80:
raise VolumeMemoryError('The size of the system combined with the '
'free space in the system is too large to '
'fit in memory on this machine.')
[docs] def addNode(self, node):
"""
Add a new node to the graph.
:type node: tuple (or hashable, sortable object)
:param node: The node to add
"""
self._all_nodes.add(node)
self._all_blobs = []
[docs] def addEdge(self, node1, node2):
"""
Add a new edge (a connection between two nodes) to the graph
:type node1: tuple (or hashable, sortable object)
:param node1: One of the nodes to create the edge between. Node order
does not matter.
:type node2: tuple (or hashable, sortable object)
:param node2: The other node to create the edge between. Node order
does not matter.
:raise KeyError: If one of the nodes is not found in the graph
"""
for anode in (node1, node2):
if anode not in self._all_nodes:
raise KeyError('Node %s is not in graph' % str(anode))
self._all_edges[node1].add(node2)
self._all_edges[node2].add(node1)
self._all_blobs = []
[docs] def removeNode(self, node):
"""
Remove a node from the graph. Also removes all edges attached to this
node
:type node: tuple (or hashable, sortable object)
:param node: The node to remove
:raise KeyError: If one of the nodes is not found in the graph
"""
self._all_nodes.remove(node)
for bonded in self._all_edges[node]:
self._all_edges[bonded].remove(node)
del self._all_edges[node]
self._all_blobs = []
[docs] def removeEdge(self, node1, node2):
"""
Remove an edge (a connection between two nodes) from the graph. The
nodes are not removed.
:type node1: tuple (or hashable, sortable object)
:param node1: One of the nodes for the existing edge. Node order
does not matter.
:type node2: tuple (or hashable, sortable object)
:param node2: The other node to for the existing edge. Node order
does not matter.
:raise KeyError: If one of the nodes is not found in the graph
"""
self._all_edges[node1].remove(node2)
self._all_edges[node2].remove(node1)
self._all_blobs = []
[docs] def nodes(self):
"""
A generator over all the nodes in the graph
:rtype: node type
:return: Iterates over each node in the graph
"""
for node in self._all_nodes:
yield node
[docs] def edges(self):
"""
A generator over all edges in the graph
:rtype: (node, node)
:return: Iterates over each pair of connected nodes. Nodes are returned
in sort order.
"""
for node, bonded in self._all_edges.items():
for bonder in bonded:
if bonder > node:
yield (node, bonder)
def _gatherBlobs(self, force=False):
"""
Find all the blobs in the system. A blob is a group of connected nodes.
:type force: bool
:param force: Recomputed the blobs even if the blob cache exists
:raise VolumeMemoryError: If memory usage grows too large
"""
if self._all_blobs:
if force:
self._all_blobs = []
else:
return
# We start with a random node. We iterate through all its connected
# neighbors, adding them to the blob. Then we iterate through the
# connected neighbors of each neighbor, etc. recursive, etc.
unblobbed_nodes = self._all_nodes.copy()
self.checkMemoryLimit()
count = 0
while unblobbed_nodes:
count += 1
node = unblobbed_nodes.pop()
# "added" contains all nodes added to the blob that we haven't yet
# checked the neighbors of
added = [node]
this_blob = set(added)
while added:
# Grab a new node to check
check_node = added.pop()
for next_node in self._all_edges[check_node]:
# Add any neighbors that we haven't already added
if next_node not in this_blob:
count += 1
# Add to the list of nodes whose neighbors we must check
added.append(next_node)
# Add to the blob
this_blob.add(next_node)
# Remove from the set of unblobbed nodes
unblobbed_nodes.remove(next_node)
if not count % 10000:
self.checkMemoryLimit()
self._all_blobs.append(this_blob)
# Sort the blob cache so that the largest blob is first
self._all_blobs.sort(key=len, reverse=True)
[docs] def blobs(self):
"""
A generator for all the blobs (groups of connected nodes) in the graph
:rtype: set
:return: Iterates over blobs. Blobs are returned in order of largest to
smallest
"""
if not self._all_blobs:
self._gatherBlobs()
for blob in self._all_blobs:
yield blob
[docs] def degree(self, node):
"""
Return the number of edges for the node
:type node: tuple (or hashable, sortable object)
:param node: The node to check
:rtype: int
:return: The number of edges for a node
:raise KeyError: If the node is not found in the graph
"""
if node not in self._all_edges:
# The edges dict is a defaultdict so must raise error manually
raise KeyError('Node %s not found in graph' % str(node))
return len(self._all_edges[node])
[docs]class XYZVolumeGraph(object):
"""
Create a networkx graph to search the voids in structure.
"""
[docs] def __init__(self, struct, spacing=2.0, scaffold=None):
"""
Create networkx graph based on the structure PBC or coordinates.
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to compute the graph over
:type spacing: float
:param spacing: The approximate spacing (Angstroms) between graph nodes.
The actual grid spacing will be adjusted in each direction to ensure
uniform grid point distribution, and the actual spacing used in each
direction can be found in the self.xyz_spacings list
:type scaffold: `Scaffold` or None
:param scaffold: a structure that occupies space as a cluster of
molecules.
"""
self.struct = struct
self.origin = [0.0, 0.0, 0.0]
self.box_lengths = [0.0, 0.0, 0.0]
try:
# May find incorrect largest void for interface scaffold
self.pbc = clusterstruct.create_pbc(self.struct)
except ValueError:
# no PBC
self.pbc = None
# Determine X, Y and Z lengths; origin may be redefined
if scaffold and scaffold.scaffold:
self.defineBoxFromScaffold(scaffold)
elif self.pbc:
self.defineBoxFromPBC()
else:
self.defineBoxFromNonPBCStruct()
self.num_xyz = [
int(math.ceil(old_div(size, spacing))) for size in self.box_lengths
]
self.xyz_spacings = numpy.array([
old_div(length, num)
for num, length in zip(self.num_xyz, self.box_lengths)
])
self.shifted_origin = self.origin + self.xyz_spacings * 0.5
self.total_points = 0
[docs] def defineBoxFromScaffold(self, scaffold):
"""
Define the grid box as the scaffold box size
:type scaffold: `Scaffold`
:param scaffold: a structure that occupies space as a cluster of
molecules.
"""
for axis in range(3):
[region_min, region_max] = scaffold.box.getMinMax(axis)
self.box_lengths[axis] = region_max - region_min
self.origin[axis] = region_min
self.hull = None
[docs] def defineBoxFromPBC(self):
"""
Define the box from the PBC. This method works for triclinic cells by
virtue of creating a hull that can be used to eliminate grid points
outside the arbitrary-shaped PBC region.
"""
# Create the 8 vertices of the PBC
vecs = self.pbc.getBoxVectors()
# The first two for statements loop over every combination of 0, 1, 2 or
# 3 vectors - 8 in total
vertices = []
# Use anywhere from 0 (gives origin) to 3 vectors
for numvecs in range(0, 4):
# Select all possible combinations of numvecs vectors - the [0, 1,
# 2] list provides the indexes for the x, y and z vectors
for selection in itertools.combinations([0, 1, 2], numvecs):
vertices.append(numpy.zeros(3))
for vecdex in selection:
vertices[-1] += vecs[vecdex]
vertices = numpy.array(vertices)
self.box_lengths = vertices.ptp(0)
# Determine the origin by finding the center of the structure and then
# going "back" -1/2 each box length. This will center the box on the
# structure.
coords = self.struct.getXYZ()
mins = numpy.min(coords, 0)
maxes = numpy.max(coords, 0)
center = old_div((mins + maxes), 2.0)
self.origin = [
center[a] - old_div(self.box_lengths[a], 2) for a in range(3)
]
# Create a convex hull that encloses the 8 vertices
vertices += self.origin
self.hull = spatial.Delaunay(vertices)
[docs] def defineBoxFromNonPBCStruct(self):
"""
Define a box that encompasses the structure, including the VDW radii of
the extreme atoms
"""
xyz = self.struct.getXYZ()
min_gids = numpy.argmin(xyz, axis=0)
for dimens, gid in enumerate(min_gids):
self.origin[dimens] = (xyz[gid][dimens] -
self.struct.atom[gid + 1].radius)
max_gids = numpy.argmax(xyz, axis=0)
for dimens, gid in enumerate(max_gids):
self.box_lengths[dimens] = (xyz[gid][dimens] +
self.struct.atom[gid + 1].radius -
self.origin[dimens])
self.hull = None
[docs] def getNodeXYZ(self, node):
"""
Convert networkx node (tuple of X, Y, Z index) to XYZ coordinates
:type node: tuple
:param node: the node to convert
:rtype: tuple
:return: A tuple of x, y z coordinates in Angstroms
"""
return tuple(self.shifted_origin +
self.xyz_spacings * numpy.array(node))
[docs] @staticmethod
def getDistanceCellDistance(struct, probe):
"""
Get the distance that will be used as the cutoff for atoms close to a
point
:type struct: `schrodinger.structure.Structure`
:param struct: The structure that will be checked
:type probe: float
:param probe: The probe radius that will be used
:rtype: float
:return: The distance cutoff that will be used
"""
maxrad = max(x.radius for x in struct.atom)
return maxrad + 0.1 + probe
[docs] def locateVoids(self, atom_ids=None, vdw_scale=1.0, probe=0.0, logger=None):
"""
Remove all nodes that overlap a bitset-on atom (all atoms by default)
in current structure. And connect all grid points that are part of the
same void.
:type atom_ids: set or None
:param atom_ids: if not None, only check clashes for atoms in this set
:type vdw_scale: float
:param vdw_scale: VDW scale factor to use
:type probe: float
:param probe: A radius to add to each atom's vdw radius in order to
determine whether the atom encompasses a point or not. This is the
thought equivalent of rolling a ball around the vdw surface of the
atoms and tracing out the center of the ball.
:type logger: `Logger`
:param logger: If given, progress will be logged using this object
:raise VolumeMemoryError: If memory usage grows too large
"""
# Create an empty graph with no nodes
self.graph = LowMemoryVolGraph()
dcell_distance = self.getDistanceCellDistance(self.struct, probe)
# Distance cells are expensive, only create it once
self.distance_cell, self.distance_pbc = \
clusterstruct.create_distance_cell(
self.struct, dcell_distance, pbc=self.pbc)
# For each potential point in the graph, we only add it if a) it is
# inside the box hull (important for triclinic cells that are not cubic
# like the grid is) and b) it is not encompassed by an atom. It is much
# better memory-wise to only put valid points in the graph than it is to
# put all points in the graph and then remove the invalid ones.
total_points = self.num_xyz[0] * self.num_xyz[1] * self.num_xyz[2]
per_ten = old_div(total_points, 10)
since_last_log = 0
for xval in range(self.num_xyz[0]):
for yval in range(self.num_xyz[1]):
for zval in range(self.num_xyz[2]):
node = (xval, yval, zval)
xyz = self.getNodeXYZ(node)
if self.hull and self.hull.find_simplex(xyz) < 0.0:
# Point is outside the PBC box
continue
# Track the total number of possible points in the box not
# counting those outside the hull
self.total_points += 1
# Get list of atoms close to xyz point using distance cell
all_matches = self.distance_cell.query_atoms(*xyz)
overlap = False
for match in all_matches:
atom_id = match.getIndex()
# Use vdw_scale & VDW radius of each atom to determine
# if it overlaps
if atom_ids and atom_id not in atom_ids:
continue
vdw = self.struct.atom[atom_id].radius
size = vdw * vdw_scale + probe
if match.getDistanceSquared() <= (size)**2:
overlap = True
break
if overlap:
continue
# This is a node inside the box but outside any atom
self.graph.addNode(node)
if not self.total_points % 10000:
self.graph.checkMemoryLimit()
if logger:
# Log progress about every 10% of steps
since_last_log += self.num_xyz[1] * self.num_xyz[2]
if since_last_log > per_ten:
now = (xval + 1) * self.num_xyz[1] * self.num_xyz[2]
pct = 100.0 * now / total_points
logger.info('Processed %.1f%% of %d gridpoints' %
(pct, total_points))
since_last_log = 0
# Now create edges between neighboring nodes
self.bondRemainingNodes()
[docs] def bondRemainingNodes(self):
"""
Connect each node with its neighbor nodes that remain in the graph.
Nodes that differ by 1 in 1 coordinate are considered neighbors. Any PBC
is also accounted for so that neighboring nodes on the opposite side of
the cell are also bonded.
:raise VolumeMemoryError: If memory usage grows too large
"""
# FIXME: Does not work for neighbors on the boundary of triclinic cells
for count, node in enumerate(self.graph.nodes()):
for neighbor in self.gridNeighbors(node, self.num_xyz, self.pbc):
try:
self.graph.addEdge(node, neighbor)
except KeyError:
# Neighbor is not in graph, that's fine
pass
# Check memory every 10000 nodes
if not count % 10000:
self.graph.checkMemoryLimit()
[docs] @staticmethod
def gridNeighbors(node, num_xyz, pbc):
"""
A generator over all potential node values for the neighbors of the
given node. The nodes may or may not exist in the graph. Neighbors are
considered to have one of x, y or z values that differs by exactly 1 and
both other values are the same. The PBC is used to connect neighbors on
opposite edges of the cell.
:type node: tuple
:param node: the node to get the neighbors for
:type num_xyz: list
:param num_xyz: The number of grid points in the x, y and z directions
:type pbc: `schrodinger.infra.structure.PBC` or None
:param pbc: The PBC if one exists
:rtype: tuple
:return: Iterates over potential (x, y, z) neighbors of the given node.
"""
# FIXME: Does not work for neighbors on the boundary of triclinic cells
for coord in range(3):
for delta in (-1, 1):
newval = XYZVolumeGraph.getNeighborCoordVal(
node, delta, coord, num_xyz[coord], pbc)
if newval is None:
continue
other = list(node)
other[coord] = newval
other = tuple(other)
yield other
[docs] @staticmethod
def getNeighborCoordVal(node, delta, coord, numpts, pbc):
"""
Get the (x, y, z) tuple for a neighbor of node that differs by delta in
coord. (x, y, z) is modified based on the PBC if necessary
:type node: tuple
:param node: the node to convert
:type delta: int
:param delta: How much the coord value differs from the given node.
Typically 1 or -1 to give a neighboring node.
:type coord: int
:param coord: 0, 1 or 2 for the x, y or z direction
:type numpts: int
:param numpts: The number of grid points in the coord direction
:type pbc: `schrodinger.infra.structure.PBC` or None
:param pbc: The PBC if one exists
:rtype: tuple or None
:return: The (x, y, z) tuple formed by modifying coord of node by delta,
or None if there is no pbc and the modified coord would be outside
0:numpts-1
"""
val = node[coord] + delta
if val < 0:
if pbc:
return numpts - 1
else:
return None
elif val == numpts:
if pbc:
return 0
else:
return None
return val
[docs] def voids(self):
"""
A generator that returns groups of connected nodes that define voids in
the structure.
:rtype: tuple
:return: Each returned value is a set of nodes that are all connected
and define a void blob. The sets are returned in order of size, largest
to smallest
"""
for blob in self.graph.blobs():
yield blob
[docs] def getLargestVoid(self):
"""
Get the largest void.
:rtype: tuple
:return: The set of nodes that form the largest void in the structure
"""
for blob in self.graph.blobs():
return blob
[docs] def getSurfaceNodes(self, blob):
"""
Get a list of nodes in blob that have fewer than 6 connections
:type blob: set
:param blob: The set of nodes to search for surface nodes in
:rtype: list
:return: Each item of the list is a node that has fewer than 6
connections
"""
# FIXME: May not work for triclinic cells
return [node for node in blob if self.graph.degree(node) != 6]
[docs] def getBuriedNode(self, blob):
"""
A generator that returns nodes that is buried inside the blob of
connected nodes. Buried nodes have lots of connections to other nodes.
:type blob: a set of tuples
:param blob: Each item of the set is a node
:rtype: tuple
:return: The buried node
"""
nodes = [node for node in blob]
# Shuffle to get a randomized order
random.shuffle(nodes)
# Sort shuffled nodes by number of connections
for node in sorted(nodes, key=self.graph.degree, reverse=True):
yield node
[docs] def getBuriedXYZ(self, blob):
"""
Like getBuriedNode but return the xyz coordinates.
:type blob: a set of tuples
:param blob: Each item of the set is a node
:rtype: tuple
:return: A tuple of x, y z coordinates in Angstroms
"""
for node in self.getBuriedNode(blob):
yield self.getNodeXYZ(node)
[docs]def find_attached_atom_index(struct, mark_index, prop=None, propval=True):
"""
Find the index of the atom attached to the atom with the given index,
optionally setting a property/value on that atom
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to search for the atom
:type mark_index: int
:param mark_index: The index of the atom that we want to find an atom
attached to.
:type prop: str
:param prop: An atom property name
:param propval: The value to set prop to on the found attached atom. The
type of this object depends on the type of property being set
:rtype: int
:return: The index of an atom attached to the mark_index atom. If more than
one atom is attached to mark_index atom, the first one is returned.
"""
mark_atom = struct.atom[mark_index]
for neighbor in mark_atom.bonded_atoms:
# Only one neighbor allowed for marker atoms
break
else:
raise ValueError('Expected marker atom %d to be bound to another atom '
'but it is not')
if prop:
neighbor.property[prop] = propval
return neighbor.index
[docs]def get_other_item(two_list, item):
"""
In a two item list, return the item that isn't the one passed in
:type two_list: list
:param two_list: A list two items long
:param item: One of the items from two_list
:return: The item in two_list that is not the one passed in
"""
return two_list[abs(two_list.index(item) - 1)]
[docs]def propagate_chain_data_from_atom(struct, chain_atom):
"""
Set the chain data for all atoms in struct to the same data as found on atom
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to set atom properties on
:type chain_atom: `schrodinger.structure._StructureAtom`
:param chain_atom: The atom object to take the chain information from.
Might not be from the same structure object as struct
"""
cid = chain_atom.property.get(CHAIN_ID_PROP)
if cid is not None:
name = chain_atom.chain
for atom in struct.atom:
atom.chain = name
atom.property[CHAIN_ID_PROP] = cid
[docs]def get_random_nonzero_to_one():
"""
Get a random float: 0 > x >= 1
:rtype: float
:return: A floating point number in the range (0, 1]
"""
ranval = random.random()
# ranval is [0, 1), but we really want (0, 1] - the line below
# accomplishes that
return 1.0 - ranval
[docs]class TorsionAngler(object):
"""
Class to handle cycling a structure though a series of torsion values
"""
[docs] def __init__(self, options, minimum_steps=5, max_stepsize=15.):
"""
Create a TorsionAngler object
:type options: `argparse.Namespace`
:param options: Object with dihedral_min and dihedral_max properties
that describe the minimum and maximum allowed dihedral angles
:type minimium_steps: int
:param minimum_steps: The minimum steps to take. The actual number of
steps may be larger if the allowed range of torsions is greather than
max_stepsize*minimum_steps.
:type max_stepsize: float
:param max_stepsize: The largest number of degrees to move the torsion
per step.
"""
self.minval = options.dihedral_min
self.maxval = options.dihedral_max
span = self.maxval - self.minval
# We use n+1 in the line below because we don't want to get back to the
# starting point by the nth step.
self.delta_angle = min(old_div((span), float(minimum_steps + 1)),
max_stepsize)
self.steps = int(round(old_div(span, self.delta_angle))) - 1
[docs] def setData(self, struct, torsion_atoms):
"""
Set the structure to operate on and the 4 atoms involved in the torsion
:type struct: `schrodinger.structure.Structure`
:param struct: The struct containing the torsion to rotate
:type torsion_atoms: list
:param torsion_atoms: A four int list, each int is the atom index of an
atom in the torsion, in the order A-B-C-D, where B-C is the bond that
rotates when the torsion changes.
"""
self.struct = struct
self.torsion_atoms = torsion_atoms
self.torsion = self.getTorsionValueInRange()
[docs] def getTorsionValueInRange(self):
"""
Get a value for the torsion that falls within the specified range. The
user may have supplied 150-210, but the measured value might be -170
instead of 190. Convert the measured value to fall within the range.
:rtype: float
:return: The torsion value converted to fall within the range specified
for the min/max of the torsion values.
"""
raw_torsion = self.struct.measure(*self.torsion_atoms)
if self.minval <= raw_torsion <= self.maxval:
return raw_torsion
sign = old_div(raw_torsion, abs(raw_torsion))
full_circle = -sign * 360.
new_torsion = full_circle + raw_torsion
if self.minval <= new_torsion <= self.maxval:
return new_torsion
# This should never happen, but punt because it isn't the worst thing in
# the world
return raw_torsion
[docs] def rotomers(self):
"""
A generator for structures that step the torsion through the desired
values.
:rtype: `schrodinger.structure.Structure`
:return: Each yield gives the structure with the specified torsion
marched through the specified values.
"""
for step in range(self.steps):
self.incrementTorsionValue()
self.struct.adjust(self.torsion, *self.torsion_atoms)
yield self.struct
[docs] def incrementTorsionValue(self):
"""
Sets the torsion property to the next value to try for a dihedral as we
rotate it through its allowed range.
"""
self.torsion = self.torsion + self.delta_angle
if self.torsion > self.maxval:
self.torsion = self.minval + (self.torsion - self.maxval)
[docs]class MoietyError(Exception):
"""
Raised if there is an error in creating a Moiety subclass object
"""
[docs]class InitiationError(Exception):
"""
Raised if there is an error in initiating the amorphous cell.
"""
[docs]class BaseMoiety(object):
"""
Base class for all polymer units - initiator, terminator, cascader, monomers
"""
# This is a class variable so that all subclass instances have access to the
# same cache
bond_length_cache = {}
RX_PROP = 'i_matsci_rx'
[docs] def __init__(self, struct, markers=None, name=None):
"""
Create a BaseMoiety object
:type struct: `schrodinger.structure.Structure`
:param struct: The structure of this Moiety
:type markers: list
:param markers: A list of the indexes of the Rx atoms. If not supplied,
information will be read from the structure properties
"""
if markers:
self.markers = markers
self.name = name
else:
self.readFromStructure(struct)
self.raw_struct = struct
if self.name:
for atom in self.raw_struct.atom:
atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP] = atom.index
atom.property[msprops.MONOMER_NAME_PROP] = self.name
self.head = None
# self.rings is a list of sets, each set is the atom indexes for a ring
self.rings = [set(x) for x in self.raw_struct.find_rings()]
self.has_rings = bool(self.rings)
self.is_cg = msutils.is_coarse_grain(struct)
self.backbone_path_indexes = []
[docs] def findAttachedAtomIndex(self, mark_index, prop=None, propval=True):
"""
Find the index of the atom attached to the atom with the given index,
optionally setting a property/value on that atom
:type mark_index: int
:param mark_index: The index of the atom that we want to find an atom
attached to.
:type prop: str
:param prop: An atom property name
:param propval: The value to set prop to on the found attached atom. The
type of this object depends on the type of property being set
:rtype: int
:return: The index of an atom attached to the mark_index atom. If more
than one atom is attached to mark_index atom, the first one is returned.
"""
index = find_attached_atom_index(self.raw_struct,
mark_index,
prop=prop,
propval=propval)
return index
[docs] def moveHeadToOrigin(self):
"""
Move the structure so that the head atom is at the origin
"""
translate = [-a for a in self.raw_struct.atom[self.head].xyz]
transform.translate_structure(self.raw_struct, *translate)
[docs] def alignToVector(self, struct, current_vector, vector):
"""
Rotate the structure so that the current_vector is aligned to a new
vector
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to rotate. The coordinates of this
structure will be modified by this method
:type current_vector: list
:param current_vector: The current vector that will be aligned to the
new vector
:type vector: list
:param vector: The new vector that current_vector should be aligned to
:return: There is no return value, the structure is modified directly
:rtype: `schrodinger.structure.Structure`
:raises ValueError: If either vector has zero magnitude
"""
current_array = numpy.array(current_vector)
new_array = numpy.array(vector)
try:
matrix = transform.get_alignment_matrix(current_array, new_array)
except ValueError:
raise ValueError(
'Unable to align fragments due to overlapped atoms.')
transform.transform_structure(struct, matrix)
[docs] def getStructureForCoupling(self,
orientation,
location=None,
vector=None,
remove_marker=True,
residue_number=None,
mark_bond=False):
"""
Get the appropriate structure to couple to the existing unit
:type orientation: str
:param orientation: Should be the HEADFIRST or TAILFIRST constant,
indicates which end of the moiety will bind
:type location: list
:param location: XYZ coordinates that the coupling atom (head or tail
depending on orientation) should be moved to (moving the rest of this
moiety with it)
:type vector: list
:param vector: The vector that the coupler-coupling marker vector should
be aligned to
:type remove_marker: bool
:param remove_marker: Whether the marker atom should be removed.
:type residue_number: int or None
:param residue_number: If not None, the resnum property of every atom in
the returned structure will be set to this value and the residue name
will be assigned.
:type mark_bond: bool
:param mark_bond: If True, the created bond between two monomers is marked
:rtype: (`schrodinger.structure.Structure`, `AttachementAtoms`)
:return: A copy of the structure of this moiety rotated and translated
as requested so that it can bind in the specified orientation. An
AttachedAtoms tuple that gives the coupler, grower and marker indexes
for this structure.
"""
struct = self.getNextStructure(orientation)
attachments = self.getCouplerAndGrower(orientation)
# Mark the grower and coupler atoms for this entity
struct.atom[attachments.coupler].property[COUPLER_ATOM_PROP] = True
if attachments.grower:
struct.atom[attachments.grower].property[GROWER_ATOM_PROP] = True
if mark_bond:
self.markHeadTailLabels(struct.atom[attachments.grower],
struct.atom[attachments.coupler])
if vector:
current_vector = struct.atom[attachments.coupler_marker].xyz
self.alignToVector(struct, current_vector, vector)
if location:
transform.translate_structure(struct, *location)
if remove_marker:
mapping = struct.deleteAtoms([attachments.coupler_marker],
renumber_map=True)
coupler = mapping[attachments.coupler]
grower = mapping.get(attachments.grower)
grow_marker = mapping.get(attachments.grow_marker)
attachments = AttachmentAtoms(coupler, None, grower, grow_marker)
# If requested, set the residue name and number
if residue_number is not None:
resname = self.getPDBName().ljust(4)
for atom in struct.atom:
atom.resnum = residue_number
atom.pdbres = resname
return struct, attachments
[docs] def markHeadTailLabels(self, grower, coupler):
"""
Mark the atom unique label based on element, monomer lettter, and
'Head or Tail' orientation.
:param grower: the atom connecting chain
:type grower: 'structure.Structure.Atom'
:param coupler: the atom waiting for other coming monomer
:type coupler: 'structure.Structure.Atom'
"""
letter = self.getPDBName()
if len(letter) != 1:
# def getLetter in MonomerRow (polymer_builder_gui.py) defines
# a one letter code, and def getStructureForCoupling assigns the
# letter to monomer atoms when building the polymer
# INI, TRM or UNK (three letters )are assigned to initiator, terminators
# or unknown monmers
return
monomer_index = str(ord(letter) - 64)
# Only the bonds between known monomers are marked
g_label = grower.element + monomer_index
c_label = coupler.element + monomer_index
if not self.directional or g_label != c_label:
grower.property[UNIQUE_ATOM_PROP] = g_label
coupler.property[UNIQUE_ATOM_PROP] = c_label
return
label = g_label
for atom in [grower, coupler]:
for prop, ht_char in zip(HT_ATOM_PROPS, ['H', 'T']):
if atom.property.get(prop):
ht_lb = coarsegrain.CHIRAL_SEPARATOR.join([label, ht_char])
atom.property[UNIQUE_ATOM_PROP] = ht_lb
break
[docs] def getBondLength(self, atom1, atom2, cg_bond_factor=0.8):
"""
Get the desired bond length between the grower atom of the existing
unit and the coupling atom of this moiety
:type atom1: `_StructureAtom`
:param atom1: One of the atoms to form a bond in between.
:type atom2: `_StructureAtom`
:param atom2: The other atoms to form a bond in between.
:type cg_bond_factor: float
:param cg_bond_factor: The pre-factor to calcuate the bond length based
on the particle radius.
:rtype: float
:return: The desired bond distance
"""
# Check for cached value
key = tuple(sorted([atom1.element, atom2.element]))
try:
return self.bond_length_cache[key]
except KeyError:
pass
if self.is_cg:
# This 2.35 is from the 'Use Martini defaults' in sketcherGUIElements.cpp
radius_sum = sum([atom.radius for atom in [atom1, atom2]])
self.bond_length_cache[key] = radius_sum * cg_bond_factor
return self.bond_length_cache[key]
# Create a structure with both atoms and use mmideal to deliver (what is
# very probably a very much NOT) ideal bond length (SHARED-3432)
struct = structure.create_new_structure(0)
for element in key:
# Add as 'C' to avoid atom typing warning messages for strange
# elements
atom = struct.addAtom('C', 0.0, 0.0, 0.0)
buildcomplex.transmute_atom(atom, element)
struct.addBond(1, 2, 1)
build.add_hydrogens(struct)
struct.retype()
# Note that this ideal bond order doesn't account for the hybridization
# of the atoms, but this is a secondary effect that will easily be
# cleaned up by simulation/QM calculation.
self.bond_length_cache[key] = mm.mmideal_get_stretch(struct, [1, 2])
return self.bond_length_cache[key]
[docs] def determineCouplerLocation(self, xyz, vector, bond_length):
"""
Figure out the XYZ coordinates where the coupler atom should go
:type xyz: list
:param xyz: The XYZ coordinates of the grower atom
:type vector: list
:param vector: The desired bond vector for coupling (goes from
grower->coupler)
:type bond_length: float
:param bond_length: the bond length between grower and coupler
:rtype: list
:return: The XYZ coordinates where the coupler atom should be placed
"""
normalv = transform.get_normalized_vector(numpy.array(vector))
scaledv = [a * bond_length for a in normalv]
location = [a + b for a, b in zip(xyz, scaledv)]
return location
[docs] def addToChain(self,
chain,
grower,
grow_marker,
orientation,
remove_marker=True,
set_residue_data=False,
propagate_chain_data=False,
chain_namer=None,
mark_bond=False):
r"""
Bind this moiety to an existing chain
The grower, grow_marker, coupler and coupler_marker are defined so that
the binding occurs like this:
chain-grower coupler_marker
\ \
grow_marker coupler-this_moiety
goes to:
chain-grower
\
coupler-this_moiety
:type chain: `schrodinger.structure.Structure`
:param chain: The existing chain to add this moiety to
:type grower: int
:param grower: The index of the atom in the existing chain that this
moiety should bond with
:type grow_marker: int
:param grow_marker: The index of the Rx atom that marks the attachment
point - the grower->grow_marker bond vector indicates the desired
grower->coupler bond vector.
:type orientation: str
:param orientation: Whether this moiety should add head first or tail
first. Should be a module HEADFIRST or TAILFIRST constant.
:type remove_marker: bool
:param remove_marker: Whether the chains grower_marker atom should be
removed. Set to False if adding multiple moieties and you don't want
existing atom numbers to change during the process.
:type set_residue_data: bool
:param set_residue_data: Whether to set the residue number property
for all atoms added by this method to 1 greater than the residue number
of the grow atom on the passed in chain. If False, no residue numbers
will be set. A residue name is also assigned if this value is True.
:type propagate_chain_data: bool
:param propagate_chain_data: Whether to set the chain data
for all atoms added by this method to the same chain information as
the grow atom on the passed in chain. If False or the grower atom
contains no chain information, this will not be done.
This parameter is used if the added moiety should have the same chain
information as the existing moiety. See also the chain_namer parameter.
:type chain_namer: function
:param chain_namer: A function that should be called with the structure
that will be added to chain. The purpose of this function should be to
add chain name data to the added structure. Supply this function if the
moiety to be added is a full chain. If the moiety to be added is just
another monomer, terminator, etc during the building of a single chain,
do not supply this function. This parameter is used if the added moiety
should have different chain information from the existing moiety, and it
is up to the chain_namer function to set that data.
See also the propagate_chain_data paramter.
:type mark_bond: bool
:param mark_bond: If True, the created bond between two monomers is marked
:rtype: tuple(AttachmentAtoms, dict)
:return: First item is the updated attachment atoms with coupler atom,
the new grower atom and the new grow marker. Second item is
dictionary with updated atom indices after removing markers where
keys are atom numbers before deleting, and value for each is the new
atom number, or None if that atom was deleted.
"""
num_existing_atoms = chain.atom_total
# Figure out the location to place the coupling atom for the proper bond
# angle and length to the chain
coupler_atom = self.getCouplerAtom(orientation)
grower_atom = chain.atom[grower]
grower_xyz = grower_atom.xyz
grow_marker_xyz = chain.atom[grow_marker].xyz
if set_residue_data:
residue_number = grower_atom.resnum + 1
else:
residue_number = None
# coupling_vector runs in the direction from my coupling atom to the
# chain grower atom. Grow vector runs in the other direction
coupling_vector = [a - b for a, b in zip(grower_xyz, grow_marker_xyz)]
grow_vector = [-a for a in coupling_vector]
bond_length = self.getBondLength(coupler_atom, grower_atom)
location = self.determineCouplerLocation(grower_xyz, grow_vector,
bond_length)
# Attach a copy of this monomer
struct, attachments = self.getStructureForCoupling(
orientation,
location=location,
vector=coupling_vector,
residue_number=residue_number,
mark_bond=mark_bond)
# Set any requested chain data
if chain_namer:
chain_namer(struct)
elif propagate_chain_data:
propagate_chain_data_from_atom(struct, grower_atom)
# Different residues should have different residue numbers
residue_number = len(chain.residue)
for increment, residue in enumerate(struct.residue, 1):
residue.resnum = residue_number + increment
chain.extend(struct)
# Create the bond
coupler = attachments.coupler + num_existing_atoms
chain.addBond(grower, coupler, 1)
if mark_bond:
self.markBond(chain, grower, coupler)
if attachments.grower:
new_grower = attachments.grower + num_existing_atoms
new_grow_marker = attachments.grow_marker + num_existing_atoms
else:
# This is an end group, no backbone/grower atom
new_grower = None
new_grow_marker = None
# We no longer need the chain's marker atom - delete it and get the new
# atom numbers
mapping = {}
if remove_marker:
mapping = chain.deleteAtoms([grow_marker], renumber_map=True)
coupler = mapping[coupler]
if new_grower:
new_grower = mapping[new_grower]
new_grow_marker = mapping[new_grow_marker]
new_attachment_atoms = AttachmentAtoms(coupler, None, new_grower,
new_grow_marker)
return new_attachment_atoms, mapping
[docs] def markBond(self, chain, grower, coupler):
"""
Mark the directional bond from grower to coupler.
:param chain: the structure whose bond is to be marked
:type chain: 'structure.Structure'
:param grower: the core atom connecting to the just added monomer
:type grower: int
:param coupler: the just added monomer's atom connecting to the core
:type coupler: int
"""
bond = chain.getBond(grower, coupler)
for prop, index in {
coarsegrain.CG_UNIQUE_BOND_PARTICLE_1_LABEL_KEY: grower,
coarsegrain.CG_UNIQUE_BOND_PARTICLE_2_LABEL_KEY: coupler
}.items():
label = chain.atom[index].property.pop(UNIQUE_ATOM_PROP, None)
if label:
bond.property[prop] = label
[docs] def findBackbone2BranchingPoint(self, exclude_markers):
"""
Find the branching point and the shortest path to the nearest atom
in backbone_path_indexes. The braching point may have multiple branches
connected to, but only one single pair of [backbone atom][branching atom]
= [path with ends] is recorded.
:type exclude_markers: list of int
:param exclude_markers: the atom ids of markers to be excluded
"""
branching_markers = list(set(self.markers) - set(exclude_markers))
self.backbone_to_marked_end = {}
for atom_index in self.backbone_path_indexes:
self.backbone_to_marked_end[atom_index] = {}
for marker in branching_markers[:]:
bonded_atom = next(self.raw_struct.atom[marker].bonded_atoms)
bonded_to_marker = bonded_atom.index
path_with_ends = None
if bonded_to_marker in self.backbone_side_atoms[atom_index]:
# the branching point is in the side group
path_with_ends = analyze.find_shortest_bond_path(
self.raw_struct, atom_index, bonded_to_marker)
elif bonded_to_marker == atom_index:
# branching from backbone
path_with_ends = [bonded_to_marker]
if path_with_ends:
branching_markers.remove(marker)
self.backbone_to_marked_end[atom_index][
bonded_to_marker] = path_with_ends
[docs] @classmethod
def write(cls, filename, struct, markers, name, classname=None):
"""
Write the structure out to a file, tagging it with properties in such a
way that the driver will recognize when reading the structure in
:type filename: str
:param filename: The path to the file
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to write
:type markers: list
:param markers: A list of the indexes of the Rx atoms.
:type name: str
:param name: The name to use for the title of the structure
:type classname: str
:param classname: The chemical class of this moiety. If not supplied,
the actual python class name will be used since those are typically
the same as the chemical class.
"""
if not classname:
classname = cls.__name__
struct.property[MOIETY_PROP] = classname
struct.property[MARKER_PROP] = ','.join([str(x) for x in markers])
struct.title = name
struct.append(filename)
[docs] def checkRequiredProperties(self, struct, propexps):
"""
Check to see if the structure has the desired properties set on it
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to check
:type propexps: list of tuple
:param propexps: (property string, explanation of what the property is)
for each structure property to check
"""
for prop, explanation in propexps:
if prop not in struct.property:
raise MoietyError('Structure %s is missing %s which specifies '
'%s.' % (struct.title, prop, explanation))
[docs] def readFromStructure(self, struct, propexps=None):
"""
Read the data for this moiety from the structure object and check to
make sure all the required properties are set
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to check
:type propexps: list of tuple
:param propexps: (property string, explanation of what the property is)
for each structure property to check
"""
if not propexps:
propexps = []
# Check that all required properties are available
propexps.append(
(MARKER_PROP, 'comma-separated list of Rx atom indexes'))
self.checkRequiredProperties(struct, propexps)
# Convert the structure properties to instance data
try:
markers = [int(x) for x in struct.property[MARKER_PROP].split(',')]
except ValueError:
raise MoietyError('The Rx atom indexes in %s must be a comma-'
'separated list of integers. Got "%s" instead.' %
(MARKER_PROP, struct.property[MARKER_PROP]))
self.markers = markers
for index, atom_id in enumerate(self.markers, start=1):
struct.atom[atom_id].property[self.RX_PROP] = index
self.name = struct.title
[docs] def findSideGroup(self):
"""
Find and mark the side group atoms. The side group is any heavy atom
not part of the backbone and any hydrogen attached to one of those
heavy atoms. Note that IUPAC specifically states that a "sidechain" is
an oligomer or polymer, while a "side group" is a non-oligomer/polymer.
So monomers have side groups, not sidechains.
"""
# Find the indexes of all backbone atoms, including those rings fused to
# the backbone
backbone_atoms = {
x for x in self.raw_struct.atom
if x.property.get(msprops.BACKBONE_ATOM_PROP)
}
marked_indexes = {x.index for x in backbone_atoms}
# In a four-atom dihedral angle list, bond rotation (2nd-3rd) changes
# the xyz of side atoms attached to the 3rd atom and the xyz of the
# 4th atom.
self.backbone_side_atoms = {}
num_groups = 0
adjoining_prop = msprops.BACKBONE_ADJOINING_ATOM_PROP
all_side_atom_ids = set()
for atom in backbone_atoms:
self.backbone_side_atoms[atom.index] = []
for neighbor in atom.bonded_atoms:
if not neighbor.property.get(msprops.BACKBONE_ATOM_PROP):
# This is a non-backbone atom bound to the backbone
if neighbor.index in self.markers:
# This is just a temporary marker atom, ignore it
marked_indexes.add(neighbor.index)
continue
if neighbor.element == 'H':
# Hydrogens are just adjoining atoms and not side groups
neighbor.property[adjoining_prop] = True
marked_indexes.add(neighbor.index)
self.backbone_side_atoms[atom.index] += [neighbor.index]
else:
# This may be a single atom or multi atom group
try:
group = self.raw_struct.getMovingAtoms(
atom, neighbor)
except ValueError:
# This is a sidegroup atom in the same ring as a
# backbone atom. This can happen if rings share at
# least one atom with a ring that is part of the
# backbone.
group = self.findRingSideGroup(
self.raw_struct, marked_indexes, neighbor.index)
if len(group) == 1:
if neighbor.property.get(
msprops.SIDE_GROUP_ATOM_PROP):
# This is an atom in a side ring.
if neighbor.index not in all_side_atom_ids:
# This atom has not been added to side atoms
self.backbone_side_atoms[atom.index] += [
neighbor.index
]
# Register this atom and prevent double
# counting from the other end.
all_side_atom_ids.add(neighbor.index)
continue
else:
# This is a non-backbone atom bound to the
# backbone and not yet part of any side group
neighbor.property[adjoining_prop] = True
# Mark all atoms in this side group with the group name
num_groups += 1
gst = self.raw_struct.extract(group)
formula = analyze.generate_molecular_formula(gst)
group_name = '%s_%d' % (formula, num_groups)
for gindex in group:
gatom = self.raw_struct.atom[gindex]
gatom.property[
msprops.SIDE_GROUP_ATOM_PROP] = group_name
marked_indexes.add(gindex)
if gindex not in self.markers:
self.backbone_side_atoms[atom.index] += [gindex]
all_side_atom_ids.update(
self.backbone_side_atoms[atom.index])
[docs] def finalizeSideGroup(self):
"""
Find atoms in backbone (marked as BACKBONE_ATOM_PROP) but not in the
shortest path (backbone_path_indexes) and then treat these atoms along
with their side groups as the side groups of certain atom in shortest
path (backbone_path_indexes)
"""
backbone_path_indexes = set(self.backbone_path_indexes)
atom_ids_to_move = set()
for atom_id_to_move in self.backbone_side_atoms:
if atom_id_to_move not in backbone_path_indexes:
atom_ids_to_move.add(atom_id_to_move)
if not atom_ids_to_move:
# all atoms marked as BACKBONE_ATOM_PROP are in
# backbone_path_indexes
return
for backbone_atom_id in self.backbone_path_indexes:
# search along backbone_path_indexes to find neighbor atoms marked
# as BACKBONE_ATOM_PROP but not in backbone_path_indexes
next_indexes = [backbone_atom_id]
while next_indexes:
current_index = next_indexes.pop(0)
for next_atom in self.raw_struct.atom[
current_index].bonded_atoms:
next_index = next_atom.index
if next_index in atom_ids_to_move:
# Find a neighbor atom marked as BACKBONE_ATOM_PROP,
# but not in backbone_path_indexes.
self.backbone_side_atoms[
backbone_atom_id] += self.backbone_side_atoms[
next_index]
self.backbone_side_atoms[backbone_atom_id].append(
next_index)
self.backbone_side_atoms.pop(next_index)
atom_ids_to_move.remove(next_index)
next_indexes.append(next_index)
[docs] def markBackboneAtom(self, index):
"""
Mark the atom as a backbone atom and make sure that if the atom is in a
ring that all atoms in the ring are marked as backbone
:type index: int
:param index: The index of the atom to mark
"""
self.raw_struct.atom[index].property[msprops.BACKBONE_ATOM_PROP] = True
for ring_indexes in self.rings:
if index in ring_indexes:
for rindex in ring_indexes:
ratom = self.raw_struct.atom[rindex]
ratom.property[msprops.BACKBONE_ATOM_PROP] = True
[docs] def setAsFragment(self):
"""
Set backbone path and side atoms for Initiator and Terminator.
Only the atom connected to R1 group is treated as backbone path;
all the rest real atoms are treated as side groups.
"""
# Use the atom connected to the first R1 marker as the head atom
head = next(self.raw_struct.atom[self.markers[0]].bonded_atoms)
self.backbone_path_indexes = [head.index]
self.backbone_side_atoms = {head.index: []}
excluded_indexes = set([head.index] + self.markers)
for atom in self.raw_struct.atom:
if atom.index in excluded_indexes:
continue
self.backbone_side_atoms[head.index].append(atom.index)
[docs] def findRotatableBonds(self):
"""
Find rotatable bond along the backbone path.
"""
self.rotatable_bonds = []
backbone = self.backbone_path_indexes
atom_idx0 = backbone[0]
for atom_idx1 in backbone[1:]:
if analyze.is_bond_rotatable(self.raw_struct.getBond(
atom_idx0, atom_idx1),
allow_methyl=True):
self.rotatable_bonds.append({atom_idx0, atom_idx1})
atom_idx0 = atom_idx1
[docs]class Terminator(BaseMoiety):
"""
A unit that terminates a chain
Note that the concept of an "Terminator" can be a bit fungible. It can
either be the actual terminator moiety that ends a polymer chain, or it may
be an already built chain that will be tacked on to some initiation point.
"""
[docs] def __init__(self, struct, markers=None, name=None):
"""
Create a Terminator object
Terminators drawn by the user as:
R1-stuff
And stored as:
marker-head-stuff
where marker is the R1 atom, head is the atom it is bound to, and stuff
is everything else.
:type struct: `schrodinger.structure.Structure`
:param struct: The structure of this Moiety
:type markers: list
:param markers: A list of the indexes of the Rx atoms
:type name: str
:param name: The name of this moiety - gets added to each atom as an
atom property
"""
BaseMoiety.__init__(self, struct, markers=markers, name=name)
self.findHead()
self.moveHeadToOrigin()
[docs] def findHead(self):
"""
Find and store the index of the head atom
"""
self.head = self.findAttachedAtomIndex(self.markers[0], HEAD_ATOM_PROP)
[docs] def getNextStructure(self, *args):
"""
Get a copy of this structure to add to a chain
"""
return self.raw_struct.copy()
[docs] def getCouplerAtom(self, *args):
"""
Get the coupler (head) atom
"""
return self.raw_struct.atom[self.head]
[docs] def getCouplerAndGrower(self, *args):
"""
Get the coupler and grower atoms and markers. The latter is None because
Terminators have no grow points
:rtype: `AttachmentAtoms`
:return: The atoms indexes of the coupler, grower and marker atoms
"""
coupler = self.head
coupler_marker = self.markers[0]
return AttachmentAtoms(coupler, coupler_marker, None, None)
[docs] def getPDBName(self):
"""
Get a PDB residue-like name for this moiety
"""
# This overrides the base class return value
return TRM
[docs]class Cascader(Terminator):
"""
A group that ends a chain but starts multiple new chains
Cascaders are drawn as::
R2
|
R1-stuff-R2
and stored as::
cascader
|
marker-head-stuff-cascader
where cascader is an atom property added to the R2 marker atoms
"""
[docs] def __init__(self, *args, **kwargs):
Terminator.__init__(self, *args, **kwargs)
self.findBackbone()
[docs] def findBackbone(self):
"""
Find the set of atoms that creates the shortest distance (number of
bonds) between the head and each cascade points. These are considered
backbone atoms.
Backbone atoms are marked with a backbone atom property.
"""
all_backbone = []
for atom in self.raw_struct.atom:
if atom.property.get(CASCADER_ATOM_PROP):
path = analyze.find_shortest_bond_path(self.raw_struct,
self.head, atom.index)
all_backbone.extend(path)
if len(path) > len(self.backbone_path_indexes):
self.backbone_path_indexes = path
# Set the backbone property
for index in all_backbone:
self.markBackboneAtom(index)
[docs] def findHead(self):
"""
Find the head atom, but also mark cascader atoms that will start new
points
"""
Terminator.findHead(self)
for index in self.markers[1:]:
self.raw_struct.atom[index].property[
CASCADER_MARKER_ATOM_PROP] = True
next(self.raw_struct.atom[index].bonded_atoms
).property[CASCADER_ATOM_PROP] = True
[docs] def readFromStructure(self, struct):
"""
Overrides the parent method to add and read Cascader-specific
properties.
See the parent class method for documentation
"""
# Check all required properties are available
propexps = [(CASCADE_GEN_PROP, 'the number of cascade generations')]
BaseMoiety.readFromStructure(self, struct, propexps=propexps)
# Convert the structure properties to instance data
self.generations = struct.property[CASCADE_GEN_PROP]
[docs] @classmethod
def write(cls, generations, filename, struct, *args):
"""
Over-ride the parent class method to add generations as a property to
the structure.
:type generations: int
:param generations: The number of cascade generations allowed
See the parent class method for documentation
"""
struct.property[CASCADE_GEN_PROP] = generations
Terminator.write(filename, struct, *args, classname='Cascader')
[docs] def getPDBName(self):
"""
Get a PDB residue-like Name for this moiety
"""
return CAS
[docs] def setAsFragment(self):
"""
Prepare Cascader information for in-cell grow.
"""
self.findSideGroup()
self.finalizeSideGroup()
self.findBackbone2BranchingPoint([self.markers[0]])
self.findRotatableBonds()
[docs]class Initiator(BaseMoiety):
"""
A group that starts the whole polymer off - can have multiple grow points,
which means that multiple chains will radiate from this group (a dendrimer).
Note that the concept of an "Initiator" can be a bit fungible. It can either
be the actual intiator moiety that starts the whole polymer, or it may be a
partially built polymer that still needs additional chains added to it.
"""
[docs] def __init__(self, *args, **kwargs):
"""
Overrides the parent init method to allow the parial_polymer keyword
parameter.
:type partial_polymer: bool
:keyword partial_polymer: If True, this Initiator is actually a
partially built polymer that is going to just act like an initiator.
"""
self.partial_polymer = kwargs.pop('partial_polymer', False)
BaseMoiety.__init__(self, *args, **kwargs)
for grow_marker in self.markers:
self.findAttachedAtomIndex(grow_marker, HEAD_ATOM_PROP)
if not self.partial_polymer:
for atom in self.raw_struct.atom:
atom.resnum = 1
atom.pdbres = self.getPDBName()
[docs] def completePolymer(self, chain_terminator, chain_namer, clash_fixer):
"""
Tack on an existing chain to each grow point
:type chain_terminator: `Terminator`
:param chain_terminator: A Terminator object that stores the existing
chain to add
:type chain_namer: function
:param chain_namer: A function that should be called on each chain
that will be added to the existing polymer. The purpose of this
function should be to add chain name data to the added structure.
:type clash_fixer: function
:param clash_fixer: A function to call on the polymer when any new chain
is added in order to fix any clashes caused by the new chain
"""
polymer = self.raw_struct.copy()
if not self.partial_polymer:
# This is a true initiator so we need to define chain information
chain_namer(polymer)
propagate_chain_data = len(self.markers) == 1
if propagate_chain_data:
# This has a single initiation point so the chain attached to
# it will share the same chain ID
chain_namer = False
else:
# This is a partial polymer, so chain data is already set on it
propagate_chain_data = False
for grow_marker in self.markers:
grower = self.findAttachedAtomIndex(grow_marker, HEAD_ATOM_PROP)
polymer.atom[grower].property[GROWER_ATOM_PROP] = True
chain_terminator.addToChain(
polymer,
grower,
grow_marker,
orientation=HEADFIRST,
remove_marker=False,
propagate_chain_data=propagate_chain_data,
chain_namer=chain_namer)
if not propagate_chain_data:
# We've added a whole new chain to an existing chain, so need to
# fix clashes
polymer = clash_fixer(polymer)
# We wait until all chains are added to remove any marker atoms because
# we don't want atom indexes to change during this process
polymer.deleteAtoms(self.markers)
return polymer
[docs] def fillBranchPoints(self, chain_terminator, chain_branch_points,
chain_namer, clash_fixer):
"""
Tack on an existing chain to each of our branch points - each point has
a percent chance of getting a chain added to it as long as its
generation is less than the generation limit
:type chain_terminator: `Terminator`
:param chain_terminator: A Terminator object that stores the existing
chain to add
:type chain_branch_points: list
:param chain_branch_points: A list of atom indexes in the
chain_terminator structure that are branch points
:type chain_namer: function
:param chain_namer: A function that should be called on each chain
that will be added to the existing polymer. The purpose of this
function should be to add chain name data to the added structure.
:type clash_fixer: function
:param clash_fixer: A function to call on the polymer when any new chain
is added in order to fix any clashes caused by the new chain
:rtype: (`schrodinger.structure.Structure`, int)
:return: The new polymer structure with additional branching, plus the
number of new branches that were added
"""
polymer = self.raw_struct.copy()
markers_to_delete = []
for grow_marker in self.markers:
grower = self.findAttachedAtomIndex(grow_marker)
markatom = polymer.atom[grow_marker]
generation = markatom.property[BRANCH_GEN_ATOM_PROP]
if generation == markatom.property[BRANCH_MAXGEN_ATOM_PROP]:
# We've reached the maximum number of branch generations
markatom.property[BRANCH_PCT_ATOM_PROP] = 0
continue
chance = get_random_nonzero_to_one() * 100.
if chance > markatom.property[BRANCH_PCT_ATOM_PROP]:
# Failed the chance to branch at this atom
markatom.property[BRANCH_PCT_ATOM_PROP] = 0
continue
# All tests have passed, we'll branch at this point
polymer.atom[grower].property[BRANCH_ATOM_PROP] = True
num_existing_atoms = polymer.atom_total
chain_terminator.addToChain(polymer,
grower,
grow_marker,
orientation=HEADFIRST,
remove_marker=False,
chain_namer=chain_namer)
polymer = clash_fixer(polymer)
# Mark the correct generation on all the newly-added branching atoms
for index in chain_branch_points:
batom = polymer.atom[num_existing_atoms + index - 1]
batom.property[BRANCH_GEN_ATOM_PROP] = generation + 1
markers_to_delete.append(grow_marker)
# Wait to delete all marker atoms until the end so that atom indexes do
# not change
polymer.deleteAtoms(markers_to_delete)
return polymer, len(markers_to_delete)
[docs] def getPDBName(self):
"""
Get a PDB residue-like Name for this moiety
"""
return INI
[docs]class Monomer(BaseMoiety):
"""
The repeating unit that makes up the polymer chain
"""
[docs] def __init__(self, struct):
"""
Create a Monomer object
Monomers drawn by the user as:
R1-stuff-R2
And stored as:
marker1-head-stuff-tail-marker2
where marker1 is the R1 atom, head is the atom it is bound to, marker2
is the R2 atom and tail is the atom it is bound to, and stuff
is everything else.
"""
BaseMoiety.__init__(self, struct)
# 4 structures are stored for each instance based on orientation and
# chirality - HEADFIRST/R, HEADFIRST/S, TAILFIRST/R, TAILFIRST/S
# precalculating these saves time when building the polymer. To access
# the HEADFIRST, R structure, use
# self.structures[HEADFIRST][msprops.CHIRAL_R]
self.structures = {HEADFIRST: {}, TAILFIRST: {}}
self.tail = None
self.tactical_index = None
# backbone_path_indexes may differ from the set of atoms marked with the
# backbone atom property. The former contains only the atoms that form
# the shortest path while the latter will additionally contain any
# atoms that are in the same ring as an atom in backbone_path_indexes
self.default_chirality = None
self.previous_chirality = None
self.directional = False
self.findHeadTailBranch()
self.findBackbone()
self.findSideGroup()
if self.all_trans:
self.setAllTransBackbone()
self.alignOnXAxis()
self.setupTacticity()
self.setDirecional()
self.flipTailFirst()
self.rings = self.raw_struct.find_rings(sort=True)
self.is_monosaccharide = self.getMonosaccharideType() is not None
[docs] def setAllTransBackbone(self):
"""
Set all the backbone dihedrals to 180
"""
atomlist = [self.markers[0]]
atomlist += self.backbone_path_indexes
atomlist += [self.markers[1]]
for index in range(len(atomlist) - 3):
indexes = atomlist[index:index + 4]
try:
self.raw_struct.adjust(180., *indexes)
except structure.AtomsInRingError:
# Center two atoms are in a ring; dihedral can't be adjusted.
pass
[docs] def hasChirality(self):
"""
Check if this monomer has a chiral backbone
:rtype: bool
:return: Whether the backbone has a chiral atom
"""
return bool(self.tactical_index)
[docs] def findHeadTailBranch(self):
"""
Find and mark the head, tail and any branching atoms
"""
self.head = self.findAttachedAtomIndex(self.markers[0], HEAD_ATOM_PROP)
self.tail = self.findAttachedAtomIndex(self.markers[1], TAIL_ATOM_PROP)
for marker in self.markers[2:]:
# This marks the atom attached to marker as a branch atom
self.findAttachedAtomIndex(marker, BRANCH_ATOM_PROP,
self.branching_percent)
batom = self.raw_struct.atom[marker]
batom.property[BRANCH_PCT_ATOM_PROP] = self.branching_percent
batom.property[BRANCH_GEN_ATOM_PROP] = 1
batom.property[BRANCH_MAXGEN_ATOM_PROP] = self.branching_max_gen
[docs] def findBackbone(self):
"""
Find the set of atoms that creates the shortest distance (number of
bonds) between the head and tail atoms. This is the monomer backbone.
Backbone atoms are marked with a backbone atom property
"""
self.backbone_path_indexes = analyze.find_shortest_bond_path(
self.raw_struct, self.head, self.tail)
# Set the backbone property
for index in self.backbone_path_indexes:
self.markBackboneAtom(index)
[docs] def findRingSideGroup(self, struct, terminator_indexes, root_index):
r"""
Return a list of atoms bound to root_index (and recursively all atoms
bound to those atoms, ad infinitum). terminator_indexes is a set of
atoms that terminate the group - when a terminator atom is encountered,
the group stops growing in that direction. Unlike
Structure.getMovingAtoms or buildcomplex.find_atoms_to_remove, this
method gracefully handles cases where terminator atoms and group atoms
share the same ring. Imagine naphthalene::
3 7
/ \ / \
4 2 8
| | |
5 1 9
\ / \ /
6 10
If root_index=6 and terminator_indexes={1}, all other atoms will become
part of the same group as 6 - because there is a path all the way around
the outer ring that bypasses 1 going clockwise from 6.
If root_index=6 and terminator_indexes={1,2}, then only atoms 6, 5, 4, 3
will be members of the group, because both atoms 1 and 2 terminate the
group.
root_index is always part of the group
:type struct: schrodinger.structure.Structure
:param struct: The structure to use
:type terminator_indexes: set of int
:param terminator_indexes: The indexes of the atoms that terminate the
group.
:type root_index: int
:param root_index: The index of the first atom in the group. All
neighbors of this atom that are not terminator atoms will be added
to the list.
:rtype: list
:return: A list of all atoms recursively bound to the root atom,
including root_index itself but not including any atom in
terminator_indexes
"""
indexes_not_yet_checked = {root_index}
group_indexes = set()
while indexes_not_yet_checked:
check_index = indexes_not_yet_checked.pop()
group_indexes.add(check_index)
for atom in struct.atom[check_index].bonded_atoms:
adex = atom.index
if adex in terminator_indexes:
# Stop going in this direction if this is a terminator atom
pass
elif adex not in group_indexes and \
adex not in indexes_not_yet_checked:
# We haven't encountered this atom before
indexes_not_yet_checked.add(adex)
return list(group_indexes)
[docs] def alignOnXAxis(self):
"""
Align the monomer so the head is at the origin and the backbone is
aligned to the +X axis
"""
self.moveHeadToOrigin()
if len(self.backbone_path_indexes) < 2:
# Nothing to align - single atom backbone
return
# Now rotate the backbone vector to align with the +X axis. The
# backbone vector points from the head (origin) to the tail
backbone_vector = self.raw_struct.atom[self.tail].xyz
self.alignToVector(self.raw_struct, backbone_vector, transform.X_AXIS)
[docs] def flipTailFirst(self):
"""
Flip the structure so the tail is on the origin and the backbone is
aligned to the +X axis
"""
# matrix for rotation 180 degrees about the Y-axis
matrix = transform.get_rotation_matrix(transform.Y_AXIS, numpy.pi)
for chirality, struct in self.structures[HEADFIRST].items():
scopy = struct.copy()
# Rotate around the Y-axis
transform.transform_structure(scopy, matrix)
# Now move the tail to the origin
translate = [-coord for coord in scopy.atom[self.tail].xyz]
transform.translate_structure(scopy, *translate)
self.structures[TAILFIRST][chirality] = scopy
[docs] def setDirecional(self):
"""
Set whether the monomer has directional properties.
"""
if msutils.is_coarse_grain(self.raw_struct):
return
head_capper_elements = set([
x.element
for x in self.raw_struct.atom[self.head].bonded_atoms
if len(x.bond) == 1 and x.index != self.markers[0]
])
tail_capper_elements = set([
x.element
for x in self.raw_struct.atom[self.tail].bonded_atoms
if len(x.bond) == 1 and x.index != self.markers[1]
])
elements = set(['H', 'F', 'Cl', 'Br', 'I', 'At'])
hc_elements = elements.difference(head_capper_elements)
tc_elements = elements.difference(tail_capper_elements)
if len(hc_elements) == 1:
tc_elements = tc_elements.difference(hc_elements)
elif len(tc_elements) == 1:
hc_elements = hc_elements.difference(tc_elements)
st_orig = self.raw_struct.copy()
for ele in hc_elements:
st_orig.atom[self.markers[0]].element = ele
try:
tc_elements.remove(ele)
except KeyError:
pass
break
for ele in tc_elements:
st_orig.atom[self.markers[1]].element = ele
break
st_flipped = self.raw_struct.copy()
st_flipped.atom[self.markers[1]].element = st_orig.atom[
self.markers[0]].element
st_flipped.atom[self.markers[0]].element = st_orig.atom[
self.markers[1]].element
self.directional = not st_orig.isEquivalent(st_flipped)
[docs] def setupTacticity(self):
"""
Store both R and S structures for the monomer so we don't have to keep
inverting the chirality when needed. This takes a miniscule amount of
effort, since it is only done once per monomer type
"""
# Find a chiral atom in the backbone
chiral_indexes = analyze.get_chiral_atoms(self.raw_struct)
backset = set(self.backbone_path_indexes)
self.tactical_index = None
bb_chirality = None
for index, chirality in chiral_indexes.items():
if chirality in R_S and index in backset:
if self.tactical_index:
# We only allow tacticity for monomers with 1 chiral
# backbone atom
self.tactical_index = None
bb_chirality = None
break
self.tactical_index = index
bb_chirality = chirality
self.default_chirality = bb_chirality
# Label the atoms by bb chirality
scopy = self.raw_struct.copy()
prop = PROP_BY_CHIRALITY[bb_chirality]
for atom in self.raw_struct.atom:
atom.property[prop] = True
self.structures[HEADFIRST][bb_chirality] = self.raw_struct
if self.tactical_index:
# Store a structure of the opposite chirality
tactical_atom = self.raw_struct.atom[self.tactical_index]
tactical_atom.property[CHIRAL_BB_ATOM_PROP] = True
# Create and store a structure with the opposite chirality
fixed_atoms = []
# Find two backbone neighbors to the chiral atom
candidates = backset
candidates.update(self.markers[:2])
tactical_atom = scopy.atom[self.tactical_index]
tactical_atom.property[CHIRAL_BB_ATOM_PROP] = True
for neighbor in tactical_atom.bonded_atoms:
if neighbor.index in candidates:
fixed_atoms.append(neighbor.index)
if len(fixed_atoms) == 2:
break
# Convert to the other chirality and store the structure
mm.mmstereo_atom_invert_chirality(scopy, self.tactical_index,
*fixed_atoms)
other_chirality = get_other_item(R_S, bb_chirality)
# Label the atoms by bb chirality
other_prop = PROP_BY_CHIRALITY[other_chirality]
for atom in scopy.atom:
atom.property[other_prop] = True
self.structures[HEADFIRST][other_chirality] = scopy
[docs] def getNextStructure(self, orientation):
"""
Get the next monomer structure with the given orientation. "Next" in
this case takes into account the chirality of the previous structure -
for syntactic polymers, the monomers alternate chirality.
:type orientation: str
:param orientation: HEADFIRST or TAILFIRST constants
:rtype: `schrodinger.structure.Structure`
:return: The next structure to use for this monomer
"""
if self.default_chirality is None:
chirality = None
elif self.tacticity == TACTICITY_ISO:
chirality = self.default_chirality
elif self.tacticity == TACTICITY_ATAC:
chirality = random.choice(R_S)
else:
if not self.previous_chirality:
chirality = self.default_chirality
else:
chirality = get_other_item(R_S, self.previous_chirality)
self.previous_chirality = chirality
return self.structures[orientation][chirality].copy()
[docs] def getCouplerAndGrower(self, orientation):
"""
Get the coupler, coupler_marker, grower and grow_marker atoms for this
monomer
:type orientation: str
:param orientation: HEADFIRST or TAILFIRST constants
:rtype: `AttachmentAtoms`
:return: The atoms indexes of the coupler, grower and marker atoms.
Which one is the head and which is the tail depends on orientation.
"""
headtail = [self.head, self.tail]
if orientation == HEADFIRST:
index = 0
else:
index = 1
coupler = headtail[index]
coupler_marker = self.markers[index]
grower = get_other_item(headtail, coupler)
grow_marker = get_other_item(self.markers, coupler_marker)
return AttachmentAtoms(coupler, coupler_marker, grower, grow_marker)
[docs] def getCouplerAtom(self, orientation):
"""
Get the coupler atom.
:type orientation: str
:param orientation: HEADFIRST or TAILFIRST constants
:rtype: `_StructureAtom`
:return: The coupler atom
Whether this is the head and or the tail depends on orientation.
"""
if orientation == HEADFIRST:
return self.raw_struct.atom[self.head]
else:
return self.raw_struct.atom[self.tail]
[docs] def isBrancher(self):
"""
Is this monomer a braching monomer?
:rtype: bool
:return: Whether this monomer is a branching monomer
"""
return len(self.markers) == 3
[docs] def isLadder(self):
"""
Is this monomer a braching monomer?
:rtype: bool
:return: Whether this monomer is a branching monomer
"""
return len(self.markers) == 4
[docs] @staticmethod
def addPropsToStructure(struct, letter, markers, name, tacticity,
branching_percent, branching_max_gen, all_trans):
"""
Add properties to the structure that the driver will need as input
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to add the properties to
:type letter: str
:param letter: The one-character code for this monomer
:type markers: list of int
:param markers: Atom indexes of the marker atoms
:type name: str
:param name: The title to use
:type tacticity: str
:param tacticity: One of the TACTICITY module constants specifying the
tacticity for this monomer
:type branching_percent: float
:param branching_percent: The % chance to branch for this monomer
:type branching_max_gen: int
:param branching_max_gen: The maximum branching generations for this
monomer
:type all_trans: bool
:param all_trans: Whether the intra-monomer backbone dihedrals should be
all set to 180.
"""
struct.property[MOIETY_PROP] = 'Monomer_%s' % letter
struct.property[MARKER_PROP] = ','.join([str(x) for x in markers])
struct.title = name
struct.property[TACTICITY_PROP] = tacticity
struct.property[BRANCH_PCT_PROP] = branching_percent
struct.property[BRANCH_MAX_PROP] = branching_max_gen
struct.property[BBTRANS_PROP] = all_trans
[docs] @classmethod
def write(cls, filename, struct, *args):
"""
Write out the structure
:type filename: str
:param filename: The path to the file
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to write out
See the addPropsToStructure method for additional argument documentation
"""
cls.addPropsToStructure(struct, *args)
struct.append(filename)
[docs] def readFromStructure(self, struct):
"""
Overrides the parent method to add and read Monomer-specific
properties.
See the parent class method for documentation
"""
# Check all required properties are available
propexps = []
propexps.append(
(TACTICITY_PROP, 'tacticity, which can have values of '
'%s, %s or %s' % (TACTICITY_ISO, TACTICITY_SYN, TACTICITY_ATAC)))
propexps.append((BRANCH_PCT_PROP, 'percent branching probability'))
propexps.append((BRANCH_MAX_PROP, 'maximum branching generations'))
propexps.append((BBTRANS_PROP, 'whether the monomer backbone is all '
'trans'))
BaseMoiety.readFromStructure(self, struct, propexps=propexps)
# Convert the structure properties to instance data
msg = ('For monomers, the %s property value must be "%s", where x is a '
'single letter that is different for each monomer. %s instead '
'has the value "%s"')
letters = struct.property[MOIETY_PROP].split('_')[-1]
letter = letters[0]
letter_up = letter.upper()
if len(letter) > 1 or letter.upper() not in string.ascii_uppercase:
raise MoietyError(msg %
(MOIETY_PROP, MONOMERX, struct.title, letter))
# self.letter is letter assigned as the code of this monomer and the
# first letter in self.letter is a single letter
self.letter = letter_up + letters[1:]
self.all_trans = struct.property[BBTRANS_PROP]
self.tacticity = struct.property[TACTICITY_PROP]
self.branching_percent = struct.property[BRANCH_PCT_PROP]
self.branching_max_gen = struct.property[BRANCH_MAX_PROP]
[docs] def getPDBName(self):
"""
Get a PDB residue-like Name for this moiety
"""
# self.letter is the single letter assigned as the code of this monomer
return self.letter
[docs] def setAsFragment(self):
"""
Prepare monomer information for in-cell grow.
"""
self.finalizeSideGroup()
self.findBackbone2BranchingPoint([self.markers[0]])
self.findRotatableBonds()
[docs] def breakTemplatePolymer(self):
"""
Copy the original moieties to tmp_moieties and break the monomers
into small new monomers according to rotatable bonds in side group.
:rtype: list of {Monomer}
:return: the small monomer moieties that replace the old large one
"""
# FIXME: should modify this function to be compatible with CAS
self.tmp_moieties = [copy.deepcopy(self)]
self.tmp_moieties[-1].pdbres = self.getPDBName().ljust(4)
self.finished_moieties = []
while self.tmp_moieties:
tmp_moiety = self.tmp_moieties.pop(0)
bonds_to_break = tmp_moiety.findBondsToBreak()
struct_copy = tmp_moiety.raw_struct.copy()
struct_copy.deleteAtoms(tmp_moiety.markers)
for atom_ids in bonds_to_break:
struct_copy.deleteBond(*atom_ids)
self.stripSideStructs(struct_copy, tmp_moiety.pdbres)
return self.finished_moieties
[docs] def findBondsToBreak(self):
"""
Search the side groups of all backbone atoms and find the
'first neighbor' rotatable bonds.
:rtype: list of tuple
:return: 'first neighbor' rotable bonds in side groups for all backbone
atoms
"""
bonds_to_break = []
for backbone_atom_id, side_atom_ids in self.backbone_side_atoms.items():
bonds_to_break += nearest_rotatable_bonds(self.raw_struct,
self.rings,
backbone_atom_id,
side_atom_ids)
return bonds_to_break
[docs] def stripSideStructs(self, struct_copy, pdbres):
"""
Strip the sub structures in side groups and form new monomers from
these sub structures.
:type struct_copy: `schrodinger.structure.Structure`
:param struct_copy: the structure copy with 'first neighbor' rotatable
bonds deleted.
:type pdbres: str
:param pdbres: the name of the residue to strip side structs from
"""
for molecule in struct_copy.molecule:
struct = molecule.extractStructure(copy_props=True)
for atom in struct.atom:
if atom.property.get(HEAD_ATOM_PROP):
head_atom_id = atom.index
if atom.property.get(TAIL_ATOM_PROP):
# This is the tail atom of parent monomer
is_finished_moiety = True
break
else:
# This is a new struct broken from parent monomer, which
# does not contain the backbone of parent monomer
# Find the longest path from root to leaf as new backbone
is_finished_moiety = False
graph = analyze.create_nx_graph(struct)
shortest_path_length = networkx.shortest_path_length(
graph, source=head_atom_id)
tail_atom_id = max(shortest_path_length,
key=shortest_path_length.get)
struct.atom[tail_atom_id].property[TAIL_ATOM_PROP] = True
resname = self.getNewResidueName(is_finished_moiety)
self.updateTemplatePolymerResidue(struct, pdbres, resname)
assign_markers(struct)
new_monomer = Monomer(struct)
new_monomer.pdbres = resname
if is_finished_moiety:
self.finished_moieties.append(new_monomer)
else:
self.tmp_moieties.append(new_monomer)
[docs] def getNewResidueName(self, is_finished_moiety):
"""
Generate the residue name for new sub structure.
:type is_finished_moiety: bool
:param is_finished_moiety: whether the new sub structure contains
the backbone of the parent monomer
:rtype: str
:return: residue name for new sub structure
"""
if is_finished_moiety:
# new Monomers based on Monomer 'X ' have resnames from 'X0 '
# to 'X999'
pdb_name = self.getPDBName()
digits = ''.join(x for x in pdb_name if x.isdigit())
letters = pdb_name.replace(digits, '')
resname = letters + str(len(self.finished_moieties))
elif self.tmp_moieties:
last_resname = self.tmp_moieties[-1].pdbres.rstrip()
resname = str(int(last_resname) + 1)
else:
resname = self.getFirstTmpResname()
return resname.ljust(4)
[docs] def getFirstTmpResname(self):
"""
The resname of the first temporary moiety in self.tmp_moieties list.
:type resname: str
:param resname: residue name of a moiety
"""
tmp_resnames = []
for residue in self.template_polymer.residue:
try:
# Temporary residue pdbres is '0', '1', ...
# Finished residue pdbres is 'A0', 'A1', ...
tmp_resnames.append(int(residue.pdbres))
except ValueError:
pass
if tmp_resnames:
return str(max(tmp_resnames) + 1)
else:
return '0'
[docs] def updateTemplatePolymerResidue(self, struct, pdbres, resname):
"""
Update the residue number and name for the atoms in template
polymer for the newly added residue.
:type struct: `schrodinger.structure.Structure`
:param struct: the new sub struct to form new monomer
:type pdbres: str
:param pdbres: name of the parent residue
:type resname: str
:param resname: residue name of the new sub struct
"""
map_monomer_atom_id = {}
for atom in struct.atom:
map_monomer_atom_id[atom.property[
msprops.MONOMER_ORIG_ATOM_IDX_PROP]] = atom.index
residue_number = 0
for residue in self.template_polymer.residue:
residue_number += 1
residue.resnum = residue_number
if residue.pdbres != pdbres:
continue
residue_number += 1
for atom in residue.atom:
orig_atom_id = atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP]
if orig_atom_id in map_monomer_atom_id:
atom.pdbres = resname
atom.resnum = residue_number
atom.property[
msprops.
MONOMER_ORIG_ATOM_IDX_PROP] = map_monomer_atom_id[
orig_atom_id]
[docs] def getMonosaccharideType(self):
"""
Get the monosaccharide type for this monomer, if any
:rtype: str
:return: The monosaccharide type, or None if the monomer is not a
monosaccharide
"""
return self.raw_struct.property.get(MONOSACC_PROP)
[docs]def remove_stale_props(st):
"""
Remove stale properties from the given structure.
:type st: `schrodinger.structure.Structure`
:param st: the structure from which to remove the stale properties
"""
# these properties could be present if the structure was read
# from moiety files prepared by the polymer builder GUI
atom_props = list(PROP_BY_CHIRALITY.values()) + [CHIRAL_BB_ATOM_PROP]
for atom in st.atom:
for key in atom_props:
atom.property.pop(key, None)
[docs]class Moieties(object):
"""
A holder and manager for the various moieties that make up the polymer
"""
[docs] def __init__(self, filename, structs=None, one_letter=True, simple=False):
"""
Create a Moieties object
:type filename: str
:param filename: The path to the file that holds the moiety structures
:type structs: list
:param structs: List of structure moiety
:type one_letter: If True, the monomer letter must a one letter code
:param one_letter: bool
:param bool simple: If True, use the simplified input rules -
initiator/terminator need not be specified, structures not marked
with MOIETY_PROP are considered monomers, etc.
"""
self.initiator = None
self.terminator = None
self.cascader = None
self.monomers = {}
self.one_letter = one_letter
untyped = 'untyped'
single_msg = ('There is more than one structure in the input '
'file with %s defined. There can be only one.')
if not structs:
structs = structure.StructureReader(filename)
untyped_monomers = []
for struct in structs:
remove_stale_props(struct)
# Each structure has a property defined that tells what kind of
# moiety it is
try:
mtype = struct.property[MOIETY_PROP].lower()
except KeyError:
if simple:
mtype = untyped
else:
raise MoietyError('Each structure in the input file must '
'have the %s property defined.' %
MOIETY_PROP)
if mtype == INITIATOR:
if self.initiator:
raise MoietyError(single_msg % INITIATOR)
self.initiator = Initiator(struct)
elif mtype == TERMINATOR:
if self.terminator:
raise MoietyError(single_msg % TERMINATOR)
self.terminator = Terminator(struct)
elif mtype == CASCADER:
if self.cascader:
raise MoietyError(single_msg % CASCADER)
self.cascader = Cascader(struct)
elif mtype == untyped:
untyped_monomers.append(struct)
elif (self.one_letter and mtype[:-1] == MONOMERX[:-1]) or (
not self.one_letter and mtype.startswith(MONOMERX[:-1])):
this_monomer = Monomer(struct)
letter = this_monomer.letter
if letter in self.monomers:
raise MoietyError('There is more than one monomer defined '
'with the code "%s".' % letter)
self.monomers[letter] = this_monomer
else:
raise MoietyError('%s has an undefined value for %s' %
(struct.title, MOIETY_PROP))
if simple:
self.handleSimpleInput(untyped_monomers)
singlets = [
x for x in [self.initiator, self.terminator, self.cascader] if x
]
self.all_moieties = list(self.monomers.values()) + singlets
def _createSimpleIT(self):
""" Create simple initiator and/or terminator if required """
# Create H initiator/terminator if needed
# Note - we don't have to specify if we want an AA or CG
# end group because CG initiator/teminators are deleted after the
# polymer is built, so it does not matter if they are AA end groups.
# CG just needs "something" to put there temporarily.
if not self.initiator:
struct = get_generic_end_structure()
struct.title = 'aH'
self.initiator = Initiator(struct)
if not self.terminator:
struct = get_generic_end_structure()
struct.title = 'wH'
self.terminator = Terminator(struct)
def _assignUntypedMonomers(self, untyped_monomers):
"""
Assign unused codes to monomers that didn't have input codes assigned.
Also assign necessary structure properties
"""
available_codes = [
x for x in string.ascii_uppercase if x not in self.monomers
]
for struct in untyped_monomers:
try:
good_code = available_codes.pop(0)
except IndexError:
raise MoietyError('Too many (> 26) unique monomers requested')
marker_string = struct.property.get(MARKER_PROP, None)
if marker_string:
markers = [int(x) for x in marker_string.split(',')]
else:
markers = self.findSimpleHeadTailMarkers(struct)
name = struct.title
tacticity = struct.property.get(TACTICITY_PROP, TACTICITY_ATAC)
branch_pct = struct.property.get(BRANCH_PCT_PROP, 0.0)
branch_maxg = struct.property.get(BRANCH_MAX_PROP, 0)
all_trans = struct.property.get(BBTRANS_PROP, False)
Monomer.addPropsToStructure(struct, good_code, markers, name,
tacticity, branch_pct, branch_maxg,
all_trans)
self.monomers[good_code] = Monomer(struct)
[docs] def findSimpleHeadTailMarkers(self, struct):
"""
Find the atoms that mark the head and tail atoms for monomers that
did not have these marked on input. The "marker" atom is the one that is
bonded to the head or tail and gets deleted when joining monomers.
For now, a simple algorithm is used that finds the longest bond path
in the structure and uses that, preferrably one terminated with H
atoms in case of ties.
:param `schrodinger.structure.Structure` struct: The monomer structure
:rtype: list
:return: First item is the index of the head atom marker, the second
item is the index of the tail atom marker
"""
# Create the graph of the structure
st_graph = analyze.create_nx_graph(struct)
# We choose backbones that start and end with H preferentially
hydrogens = {x.index for x in struct.atom if x.element == 'H'}
backbone = graph.find_backbone(st_graph, prefer_indexes=hydrogens)
head_marker = backbone[0]
tail_marker = backbone[-1]
return [head_marker, tail_marker]
[docs] def hasCascader(self):
"""
Has a cascader been defined?
:rtype: bool
:return: Whether a cascader was defined
"""
return bool(self.cascader)
[docs] def hasBrancher(self):
"""
Has a brancher been defined?
:rtype: bool
:return: Whether a brancher was defined
"""
return any([x.isBrancher() for x in self.monomers.values()])
[docs] def hasLadder(self):
"""
Return True if ladder monomers are found.
:rtype: bool
:return: Ladder monomers exist
"""
return any([x.isLadder() for x in self.monomers.values()])
[docs] def singleR1Atom(self, end_moieties=None):
"""
Whether each end moiety has one single R1 atom.
:param end_moieties: the end moieties to check against.
:type end_moieties: list
:return: True if each end moiety has one single R1 atom
:rtype: bool
"""
if end_moieties is None:
end_moieties = [self.initiator, self.terminator]
return all(len(x.markers) == 1 for x in end_moieties)
[docs] def hasRings(self):
"""
Do any moieties have rings?
:rtype: bool
:return: Whether any of the moieties have rings
"""
return any(x.has_rings for x in self.all_moieties)
[docs] def getMonomer(self, letter):
"""
Get the monomer with the given one-letter code
:type letter: str
:param letter: The one-letter code for the desired monomer
:rtype: `Monomer`
:return: The monomer with the given one-letter code
"""
return self.monomers[letter]
[docs] def getMoiety(self, letter):
"""
Get the get one single moiety based on 'INI', 'TRM', 'CAS' or
monomer one-letter code.
:type letter: str
:param letter: The one-letter code for the desired monomer
:rtype: `Monomer`, `Initiator`, `Cascader`, or `Terminator`
:return: the initiator, cascader, terminat or one nonomer find by
the one letter code
"""
if letter == INI:
return self.initiator
elif letter == TRM:
return self.terminator
elif letter == CAS:
return self.cascader
else:
return self.monomers[letter]
[docs] def getOneLetterCodes(self):
"""
Get all the allowed one-letter codes
:rtype: list
:return: All the defined one-letter codes
"""
return list(self.monomers)
[docs] def checkValidOneLetterCodes(self, codes):
"""
Check that all the codes in the given string are valid
:type codes: str
:param codes: A string with a series of one-letter codes
:rtype: bool
:return: Whether all the one-letter codes are associated with a Monomer
"""
for letter in codes:
if letter == WILDCARD:
continue
try:
self.monomers[letter]
except KeyError:
return False
return True
[docs] def checkValidWeights(self, weights):
"""
Validate the given weights
:type weights: dict
:param weights: Keys are one-letter codes, values are integer weights
for that monomer in a random copolymer
:rtype: bool
:return: Whether all the one-letter codes in weights are valid
"""
for letter in self.monomers.keys():
if letter not in weights:
weights[letter] = 1
codestring = "".join(weights.keys())
return self.checkValidOneLetterCodes(codestring)
[docs] def getCascadeGenerations(self):
"""
Get the number of cascade generations
:rtype: int
:return: The number of cascade generations (0 if none)
"""
if not self.cascader:
return 0
return self.cascader.generations
[docs] def getAllMoietyNames(self):
"""
Get the names of all Moieties - initiator, terminator, monomers, etc.
:rtype: list
:return: A list of all moiety names. Monomers first, then initiator,
terminator and cascader (if defined)
"""
names = [x.name for x in self.all_moieties]
return names