"""
This script will break up a set of input molecules into fragments
based on some simple rules, similar to those described in the original
RECAP paper. Run with -h to see the basic rules.
If -murcko option is specified, then Murco Assembly scaffolds are generated
instead of fragments. This is done by removing terminal from each "branch"
until reacing a ring.
If -recap option is specified, then rules are as follows:
I. Only break the bonds matching the RECAP SMARTS patterns.
1. Amide
2. Ester
3. Amine
4. Urea
5. Ether
6. Olefin
7. Quarternary nitrogen
8. Aromatic nitrogen - aliphatic carbon
9. Lactam nitrogen - aliphatic carbon
10. Aromatic carbon - aromatic carbon
11. Sulphonamide
II. Do not break ring bonds
III. Do not break bonds that would yield one of the following fragments:
hydrogen, methane, ethane, prone, butane.
NOTE: -recap_use can be used to specify which RECAP rules to use. The value
must be a list of comma-separated numbers (no spaces).
Example usage::
from schrodinger.structutils import createfragments
frag_creator = createfragments.FragmentCreator(atoms=options.atoms,
bonds=options.bonds, smarts=options.smarts,
carbon_hetero=options.carbon_hetero,
maxatoms=options.maxatoms,
removedup=options.removedup, murcko=options.murcko,
recap=options.recap, recap_use=recap_list,
complete=options.complete,
add_h=options.add_h, verbose=True)
for struct in my_structures:
# fragments is a list of Structure objects
fragments = frag_creator.fragmentStructure(struct)
Copyright Schrodinger, LLC. All rights reserved.
"""
import sys
import warnings
import schrodinger.utils.log as log
from schrodinger.structutils import analyze
from schrodinger.structutils import build
logger = log.get_output_logger(__file__)
ATTACHPROP = 'b_fragmol_attachment'
RECAP_PROP_PREFIX = 'b_fragmol_recap_'
[docs]def are_sts_the_same(st1, st2):
"""
Checks coordinates of each atom in the specified structures and
returns true if they are the same
:type st1: `structure.Structure`
:param st1: The first structure for comparison
:type st2: `structure.Structure`
:param st2: The second structure for comparison
:rtype: bool
:return: True if the structures have the same coordinates, False if not
"""
if st1.atom_total != st2.atom_total:
return False
for i in range(1, st1.atom_total + 1):
a1 = st1.atom[i]
a2 = st2.atom[i]
if a1.xyz != a2.xyz:
return False
# All coordinates match; same fragment:
return True
# RECAP rules. Note that "!@" must be used in order for in-ring bonds to not
# get broken.
RECAP_PATTERNS = [
("[#6][C;X3](=O)!@[N;X3]", 2, 4, "Amide"),
("[!#1][O;X2]!@[C;X3](=O)[C;X4;H2,H3]", 2, 3, "Ester"),
("[#7;X3](!@[C,N&X3,#1])([C,N&X3,#1])[C,N&X3,#1]", 1, 2, "Amine"),
("[#7;X3]!@[C;X3](=O)[#7;X3]", 1, 2, "Urea"),
("[#6]!@[O;X2][#6]", 1, 2, "Ether"),
("[#6]=!@[#6]", 1, 2, "Olefin"),
("[#7;X4]!@[#6]", 1, 2, "Quarternary nitrogen"),
("[n]!@[C]", 1, 2, "Aromatic nitrogen - aliphatic carbon"),
("C!@N@C=O", 1, 2, "Lactam nitrogen - aliphatic carbon"),
("c!@c", 1, 2, "Aromatic carbon - aromatic carbon"),
("[#7][S;X4](=O)(=O)", 1, 2, "Sulphonamide"),
]
# Patters for fragments that should NOT be produced according to the
# RECAP rule. First atom is the bonding atom. NOTE: Hydrogens are excluded
# by different means in the code, so the SMARTS pattern is not included here.
TERMINAL_PATTERNS = [
"[C;X4;H3]", # Methyl
"[O][C;X4;H3]", # Methoxy
"[C;X4;H2][C;X4;H3]", # Ethyl
"[O][C;X4;H2][C;X4;H3]", # Ethoxy
"[C;X4;H2][C;X4;H2][C;X4;H3]", # Propyl
"[C;X4;H2][C;X4;H2][C;X4;H2][C;X4;H3]", # Butyl
]
[docs]def find_recap_bonds(st, options):
"""
Find all bonds that match the RECAP rules described in
J. Chem. Inf. Comput. Sci. 1998, 38, 511-522
Will return a list of [(atom1, atom2, rule_num)].
:type st: `schrodinger.structure.Structure`
:param st: The structure to search for RECAP rule bonds
:param options: The object holding the compute options
:rtype: set of tuples
:return: Each tuple is (atom1, atom2, rule_num) where atom1 and atom2 are
the atoms involved in the bond and rule_num is the recap rule that the bond
matches. atom1 and atom2 are sorted.
"""
# Ev:80526
# FIXME Add ability to use only certain RECAP rules (Ev:90142)
recap_bonds = set() # set of sets of 2 atoms each.
# sets are used in order to avoid listing the same bond more than once.
if options.recap_use:
use_rules = [int(x) for x in options.recap_use]
else:
use_rules = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
# Remove bonds that would produce small, "uninteresting" fragments
# (hydrogens, methyls, ethyls, propyls, and butyls):
small_terminal_groups = analyze.evaluate_multiple_smarts(
st, TERMINAL_PATTERNS)
# this is a list of atom lists. First atom is the bonded atom
# Make a list of first (bonded) atoms:
small_terminal_bond_atoms = [atoms[0] for atoms in small_terminal_groups]
logger.debug(' SMALL TERMINAL BOND ATOMS: %s' % small_terminal_bond_atoms)
for num, (pattern, pos1, pos2, doc) in enumerate(RECAP_PATTERNS, start=1):
if num not in use_rules:
continue
logger.debug(" using pattern: %s" % pattern)
with warnings.catch_warnings():
warnings.filterwarnings("ignore",
category=DeprecationWarning,
message="analyze.evaluate_smarts")
atom_sets = analyze.evaluate_smarts(st, pattern)
for atoms in atom_sets:
atom1 = atoms[pos1 - 1]
atom2 = atoms[pos2 - 1]
if st.atom[atom1].element == 'H' or st.atom[atom2].element == 'H':
logger.debug(' will not break bond because it is to a '
'hydrogen')
continue # Do not break this bond
if atom1 in small_terminal_bond_atoms or \
atom2 in small_terminal_bond_atoms:
logger.debug(' will not break bond because it will produce '
'a small molecule')
continue
# Make a list of (atom1, atom2, rule_num):
bond = sorted([atom1, atom2]) + [num]
recap_bonds.add(tuple(bond))
logger.debug(' DEBUG recap_bonds: %s' % recap_bonds)
return recap_bonds
class _Options(object):
"""
A class to hold a list of options. Mimics the object returned by
parseargs.
"""
def __init__(self,
atoms=2,
bonds=100,
smarts=None,
carbon_hetero=False,
maxatoms=200,
removedup=False,
murcko=False,
recap=False,
recap_use=None,
complete=False,
add_h=True,
verbose=False):
"""
Create an _Options object
:type atoms: int
:param atoms: Minimum number of non-hydrogen atoms in a fragment
(default=2)
:type bonds: int
:param bonds: Maximum number of broken bonds allowed in creating a
fragment (default=100)
:type smarts: str
:param smarts: Path to a file of SMARTS patterns (one per line) to
identify and eliminate fragments containing undesired features.
:type carbon_hetero: bool
:param carbon_hetero: Allows cutting of bonds between ring-carbon and
hetero atoms (default=False)
:type maxatoms: int
:param maxatoms: Maximum number of atoms allowed in molecule to be
fragmented (default=200).
:type removedup: bool
:param removedup: Keep only one copy of identical fragment but with
different coordinates. (default=False)
:type murcko: bool
:param murcko: Generate Murco Assembly scaffold instead of regular
fragments. (default=False)
:type recap: bool
:param recap: Use RECAP rules to find bonds to break.
:type recap_use: list of int
:param recap_use: Specify which RECAP rules to use (default is to use
all). Value must be a list of integers (example: [1, 2, 10, 11]).
:type complete: bool
:param complete: Retain the complete list of fragments created,
including intermediates. Only works with RECAP rules. (Default: False)
:type add_h: bool
:param complete: Add hydrogens to complete lewis structures. I some
calses, you do not want to do this (Default: True)
:type verbose: bool
:param verbose: True if progress should be logged, False if not
"""
self.min_out_size = atoms
self.max_broken_bonds = bonds
self.smarts_file = smarts
self.cut_hetero = carbon_hetero
self.max_size = maxatoms
self.removedup = removedup
self.murcko = murcko
self.recap = recap
self.recap_use = recap_use
self.complete = complete
self.verbose = verbose
self.add_h = add_h
[docs]class FragmentCreator(object):
"""
This class will break up input structures into fragments based on some
simple rules. The basic rules are:
1. Cut one of these 2 types of bonds
I. Bonds to rings
a. Do not cut bonds to hydrogens
b. Do not cut any in-ring bonds
c. Do not cut bonds between ring-carbon and a non-carbon atoms
(allowed with -c option)
II. Carbon-carbon bonds
a. Do not cut any bonds if either atom is in any ring
2. Refuse a fragment if any part of it matches a line in the SMARTS file
specified by -smarts.
3. Refuse a fragment if the number of broken bonds exceeds -bonds.
4. Refuse a fragment if the number of atoms is less than -atoms.
5. Do not attempt to fragment molecules with more than -maxatoms atoms.
For -recap option, here are the rules that are used:
1. [#6][C;X3](=O)!@[N;X3] Amide
2. [!#1][O;X2]!@[C;X3](=O)[C;X4;H2,H3] Ester
3. [#7;X3](!@[C,N&X3,#1])([C,N&X3,#1])[C,N&X3,#1] Amine
4. [#7;X3]!@[C;X3](=O)[#7;X3] Urea
5. [#6]!@[O;X2][#6] Ether
6. [#6]=!@[#6] Olefin
7. [#7;X4]!@[#6] Quarternary nitrogen
8. [n]!@[C] Aromatic nitrogen - aliphatic carbon
9. C!@N@C=O Lactam nitrogen - aliphatic carbon
10. c!@c Aromatic carbon - aromatic carbon
11. [#7][S;X4](=O)(=O) Sulphonamide
"""
[docs] def __init__(self, **kwargs):
"""
Create a FragmentCreator object.
:type atoms: int
:param atoms: Minimum number of non-hydrogen atoms in a fragment
(default=2)
:type bonds: int
:param bonds: Maximum number of broken bonds allowed in creating a
fragment (default=100)
:type smarts: str
:param smarts: Path to a file of SMARTS patterns (one per line) to
identify and eliminate fragments containing undesired
features.
:type carbon_hetero: bool
:param carbon_hetero: Allows cutting of bonds between ring-carbon and
hetero atoms (default=False)
:type maxatoms: int
:param maxatoms: Maximum number of atoms allowed in molecule to be
fragmented (default=200).
:type removedup: bool
:param removedup: Keep only one copy of identical fragment but with
different coordinates. (default=False)
:type murcko: bool
:param murcko: Generate Murco Assembly scaffold instead of regular
fragments. (default=False)
:type recap: bool
:param recap: Use RECAP rules to find bonds to break.
:type recap_use: list of int
:param recap_use: Specify which RECAP rules to use (default is to use
all). Value must be a list of integers (example:
[1, 2, 10, 11]).
:type complete: bool
:param complete: Retain the complete list of fragments created,
including intermediates. Only works with RECAP rules.
(Default: False)
:type add_h: bool
:param complete: Add hydrogens to complete lewis structures. I some
calses, you do not want to do this (Default: True)
:type verbose: bool
:param verbose: True if progress should be logged, False if not
"""
self.options = _Options(**kwargs)
self.checkArgs()
[docs] def getPatternString(self):
"""
Get a string that describes the RECAP patterns
:rtype: str
:return: A string describing the RECAP patterns.
"""
pring = ('\nRECAP patterns (SMARTS pattern, and positions of the '
'bond atoms):\n')
for num, (pattern, pos1, pos2, doc) in enumerate(RECAP_PATTERNS,
start=1):
pring += " %2d: %s:\n" % (num, doc)
pring += " %s %i-%i\n\n" % (pattern, pos1, pos2)
return pring
[docs] def checkArgs(self):
"""
Check the input arguments for appropriate usage
"""
options = self.options
if options.min_out_size < 1:
raise RuntimeError("Number of non-hydrogen atoms in output fragment"
" cannot be less than one.")
if options.max_broken_bonds < 0:
raise RuntimeError("Maximum number of broken bonds cannot be less "
"than zero.")
if options.max_size < 10:
raise RuntimeError("Maximum number of atoms in a structure to be "
"fragmented cannot be less than 10.")
if options.murcko and options.recap:
raise RuntimeError("murcko and recap options are mutually "
"exclusive.")
if options.complete and not (options.recap or options.recap_use):
raise RuntimeError("complete can only be used with the recap or "
"recap_use options.")
if options.recap_use:
try:
use_rules = [int(x) for x in options.recap_use]
except ValueError:
raise RuntimeError("Invalid value for recap_use option - must "
"be a list of ints.")
else:
if options.verbose:
logger.info("Using RECAP rules: %s" % use_rules)
# Ev:92083 and Ev:92081
options.recap = True
elif options.recap:
if options.verbose:
logger.info("Using all RECAP rules")
if options.murcko:
if options.verbose:
logger.info("Using murcko mode")
if options.verbose:
logger.info('Requested maximum broken bonds: %i' %
options.max_broken_bonds)
# FIXME does not apply to -recap
logger.info('Requested cut hetero: %s' % options.cut_hetero)
[docs] def fragmentStructure(self, struct):
"""
Break the input structure into fragments.
:type struct: `schrodinger.structure.Structure`
:param struct: The structure to break into fragments
:rtype: list
:return: list of `schrodinger.structure.Structure` objects for the
fragments.
"""
if len(struct.atom) > self.options.max_size:
raise RuntimeError('Skipping ligand %s because it has more than'
' %d atoms.' %
(struct.title, self.options.max_size))
fragmenter = _StructureFragmenter(struct, self.options)
return fragmenter.fragments[:]
[docs] def fragmentStructureForEA(self, struct):
"""
Break the input structure into fragments. Used for Event Analyis (EA)
:type struct: `schrodinger.structure.Structure`
:return: list of `schrodinger.structure.Structure` objects for the
fragments
"""
if len(struct.atom) > self.options.max_size:
raise RuntimeError('Skipping ligand %s because it has more than'
' %d atoms.' %
(struct.title, self.options.max_size))
fragmenter = _StructureFragmenter(struct, self.options)
return fragmenter.getStOutput()
class _StructureFragmenter(object):
"""
Class to create fragments from a given set of `_Options` and a given
`schrodinger.structure.Structure`.
"""
def __init__(self, st, options):
"""
Create a StructureFragmenter object.
:type st: `schrodinger.structure.Structure`
:param st: The structure to fragment
:type options: `_Options`
:param options: The options to use during fragmenting
"""
self.st = st
self.options = options
self.fragments = []
# Label the original atom numbers:
for atom in st.atom:
atom.label_user_text = str(int(atom))
self.poss_new_mols = []
self.breakable_bonds = [] #List of possible bonds to be broken.
if self.options.murcko:
logger.debug(" Using murcko mode")
ok = self.generateMurckoScaffold()
if ok:
self.storeFragments(self.options)
elif self.options.recap:
logger.debug(" Using RECAP rules")
# NOTE: May return same bond more than once as part of a different
# RECAP rule:
recap_bonds_set = find_recap_bonds(self.st, options)
recap_bonds_str = ", ".join(
['%i-%i' % (a1, a2) for (a1, a2, num) in recap_bonds_set])
if not recap_bonds_str:
recap_bonds_str = 'None'
if self.options.verbose:
logger.info(" Bonds matching the RECAP rules: %s" %
recap_bonds_str)
# EV:96032
if self.options.complete:
all_bonds = self.getCombinations(recap_bonds_set)
#-- Break each combination from a single copy of the molecule
for combinations in all_bonds:
for bond_set in combinations:
self._makeFragmentsUsingBondsSet(bond_set)
else:
self._makeFragmentsUsingBondsSet(recap_bonds_set)
self.generateFinalMolecules()
self.storeFragments(self.options)
else:
if self.options.verbose:
logger.info("Using standard mode")
# Get a list of C-C bonds that can be broken:
self.findBreakableBonds()
if self.options.verbose:
logger.info("Possible breaks: %i" % len(self.breakable_bonds))
if self.breakable_bonds:
self.breakBonds()
self.generateFinalMolecules()
self.storeFragments(self.options)
def findBreakableBonds(self):
"""
Gets the list of those C-C bonds that should be broken.
"""
self.breakable_bonds = []
# Any heavy atom bound via a (non-ring, single-order bond) to any other
# heavy atom within a ring:
for c1, c2 in analyze.evaluate_smarts(self.st, "[!#1]!@;-[!#1;r]"):
# NOTE it is possible that both atoms are in rings
if not self.options.cut_hetero:
# element of (possibly) non-ring atom
e1 = self.st.atom[c1].element
# element of ring atom
e2 = self.st.atom[c2].element
if e1 != 'C' and e2 == 'C':
continue
bond = self.st.getBond(c1, c2)
if bond not in self.breakable_bonds:
self.breakable_bonds.append(bond)
# Find all carbon-carbon bonds (except those in rings):
for c1, c2 in analyze.evaluate_smarts(self.st, "[C;!r][C;!r]"):
bond = self.st.getBond(c1, c2)
if bond not in self.breakable_bonds:
self.breakable_bonds.append(bond)
def breakBonds(self):
"""
Breaks the C-C bonds that should be broken and returns a list of new
molecules
"""
# Algorithm: for each bond, with it broken, alternate all other bonds;
# then with it unbroken alternate all bonds (numbered above it)
self.poss_new_mols = []
self.curr_bond_index = 0
# key: bond index in breakable_bonds; value: boolean
self.break_nobreak_dict = {}
if self.options.verbose:
logger.info(' Building fragments')
self.alternateFurthurBonds()
def generateFinalMolecules(self):
"""
Create a list of fragments that meet all the criteria
"""
self.final_molecules = []
for i, mol in enumerate(self.poss_new_mols):
# Determine if molecule matches the following criteria:
# 1. More than <min> # of heavy atoms
# 2. Doens't match user-specified SMARTS patterns
# 3. Number of broken bonds is fewer than user-specified value
if self._checkMolecule(mol):
self.final_molecules.append(mol)
def removeTerminalAtoms(self):
"""
Remove atoms that are bound to only one other atom.
Returns True if such atoms were found.
"""
terminal_atoms = []
for atom in self.st.atom:
if len(atom.bond) in [0, 1]:
terminal_atoms.append(atom.index)
if terminal_atoms:
self.st.deleteAtoms(terminal_atoms)
return True
else:
return False
def generateMurckoScaffold(self):
self.final_molecules = []
done = False
while not done:
removed = self.removeTerminalAtoms()
if not removed:
done = True
if len(self.st.atom) == 0:
# No ring atoms in this st
return False # ERROR
else:
self.final_molecules = [self.st]
return True # OK
def storeFragments(self, options):
"""
Creates the final fragment list - also adds hydrogen atoms and sets the
title
"""
for index, mol_st in enumerate(self.final_molecules):
if options.add_h:
build.add_hydrogens(mol_st)
mol_st.title = "%s (frag %i)" % (self.st.title, index + 1)
self.fragments.append(mol_st)
def getStOutput(self):
return self.poss_new_mols
def _checkMolecule(self, mol):
"""
Check to see if a fragment structure passes all the criteria
:type mol: `schrodinger.structure.Structure`
:param mol: The fragment structure to check
:rtype: bool
:return: True if structure passes the criteria, False if not
"""
if not self.checkBrokenBonds(mol):
logger.debug(' DID NOT PASS checkMolecule - too many broken bonds')
return False
if not self.hasEnoughHeavyAtoms(mol):
logger.debug(' DID NOT PASS checkMolecule - '
'not enough heavy atoms')
return False
if not self.isAllowedSmartsPattern(mol):
logger.debug(' DID NOT PASS checkMolecule - '
'not allowed SMARTS pattern')
return False
logger.debug(' PASSED checkMolecule')
return True
def hasEnoughHeavyAtoms(self, mol):
"""
Calculates how many non-hydgrogen atoms the molecule contains
and returns False if less than user-specified minimum.
:type mol: `schrodinger.structure.Structure`
:param mol: The fragment structure to check
:rtype: bool
:return: True if the fragment has enough heavy atoms, False if not
"""
num_heavy_atoms = 0
for atom in mol.atom:
if atom.element != "H":
num_heavy_atoms += 1
return (num_heavy_atoms >= self.options.min_out_size)
def isAllowedSmartsPattern(self, mol):
"""
Checks if any prohibited SMARTS pattern matches fragment
:type mol: `schrodinger.structure.Structure`
:param mol: The fragment structure to check
:rtype: bool
:return: True if the fragment has an allowed SMARTS pattern, False if
not
"""
# FIXME read this file at the start of the script.
if self.options.smarts_file is None:
return True
try:
file_handle = open(self.options.smarts_file)
except:
logger.error("SMARTS pattern file not valid. Please try again.")
sys.exit()
for s in file_handle:
smarts_pattern = s.strip()
mol_atoms = analyze.evaluate_smarts(mol, smarts_pattern)
if (len(mol_atoms) > 0):
return False
return True
def checkBrokenBonds(self, mol):
"""
Checks if molecule has more than the cutoff number of broken bonds
:type mol: `schrodinger.structure.Structure`
:param mol: The structure to check
:rtype: bool
:return: True if the molecule has fewer than the maximum number of
cutoff bonds, False if not
"""
num_broken_bonds = 0
for atom in mol.atom:
if ATTACHPROP in atom.property:
num_broken_bonds += 1
return (num_broken_bonds <= self.options.max_broken_bonds)
def alternateFurthurBonds(self):
"""
Recursive function for breaking any combination of brekable bonds.
For example, if there are 3 bonds, then the following breaks will be
produced::
1 2 3 12 23 13 123
For 4 bonds the following::
1 2 3 4 12 13 14 23 24 14 123 124 134 234 1234
etc.
"""
# Number of the bond we are about to alternate:
sys.stdout.write(".")
sys.stdout.flush()
if self.curr_bond_index > 100:
if self.options.verbose:
logger.info(' breaking')
return
# Check if we are at the last bond:
if self.curr_bond_index + 1 == len(
self.breakable_bonds): # on last bond
self.break_nobreak_dict[self.curr_bond_index] = True
self.makeFragments(
) # Use self.break_nobreak_dict to make fragments
self.break_nobreak_dict[self.curr_bond_index] = False
self.makeFragments(
) # Use self.break_nobreak_dict to make fragments
# Go back in the branch:
self.break_nobreak_dict.pop(self.curr_bond_index)
return
# Follow one branch:
self.curr_bond_index += 1
self.break_nobreak_dict[self.curr_bond_index] = True
self.alternateFurthurBonds()
# After the branch reached end, break_nobreak_dict will be reversed
# so follow the next branch:
self.break_nobreak_dict[self.curr_bond_index] = False
self.alternateFurthurBonds()
self.curr_bond_index -= 1
def makeFragments(self):
"""
Generate fragments by breaking bonds that have True set for them
in self.break_nobreak_dict.
"""
bonds_set = set()
for bond_index, do_break in self.break_nobreak_dict.items():
if do_break:
bond = self.breakable_bonds[bond_index]
a1 = bond.atom1.index
a2 = bond.atom2.index
bonds_set.add(tuple(sorted([a1, a2])))
self._makeFragmentsUsingBondsSet(bonds_set)
def _makeFragmentsUsingBondsSet(self, bonds_set):
"""
Generate fragments by breaking the bonds in the bonds_set.
Each bond is a set of 2 atoms.
"""
# Keep track of bonds that we already broke:
# Set of sorted (atom1, atom2):
broken_bonds = set()
st = self.st.copy()
num_broken = 0
for bond in bonds_set:
if len(bond) == 3:
# Using -recap option
index1, index2, recap_num = tuple(bond)
else:
# -recap option not used
index1, index2 = tuple(bond)
recap_num = None
a1 = st.atom[index1]
a2 = st.atom[index2]
# index1 is already less than index2; no need to sort
bond_index = (index1, index2)
if bond_index not in broken_bonds:
st.deleteBond(a1, a2)
logger.debug(' BROKE BOND: %i-%i' % (index1, index2))
a1.property[ATTACHPROP] = True
a2.property[ATTACHPROP] = True
num_broken += 1
broken_bonds.add(bond_index)
# Whether or not we broke this bond before, we need to mark it for
# this specific RECAP rule (Ev:80527):
if recap_num:
a1.property[RECAP_PROP_PREFIX + str(recap_num)] = True
a2.property[RECAP_PROP_PREFIX + str(recap_num)] = True
if num_broken == 0:
return
for mol in st.molecule:
molst = mol.extractStructure()
# Compare the new fragment to all existing fragments,
# and exclude it if its not unique:
unique = True
for othermolst in self.poss_new_mols:
if self.options.removedup:
if molst.isEquivalent(othermolst):
unique = False
else:
if are_sts_the_same(molst, othermolst):
unique = False
if not unique:
break
if unique:
logger.debug(' Fragment is unique')
self.poss_new_mols.append(molst)
else:
logger.debug(' Fragment is not unique (discarded)')
def getCombinations(self, bonds):
"""
Get all possible combinations of bonds to break that will return
all possible fragments, including intermediates.
:rtype: list of lists
:return: List of lists where each list contains the bonds to be
cleaved for one copy of the molecule.
"""
#-- Get the max number of breaks needed to cover all
# possible bond cleavage combinations
max_breaks = (len(bonds) // 2) + 1
#-- Change the set to a list of lists
bonds = [[b] for b in bonds]
bond_groups = [bonds]
while max_breaks > 1:
new_grp = []
#-- Copy & append to the last group attached
for grp in bond_groups[-1]:
_grp = [grp + b for b in bonds if not b[0] in grp]
new_grp.extend(_grp)
bond_groups.append(new_grp)
max_breaks -= 1
return bond_groups