"""
Higher-level wrappers to the Canvas Fingerprint generation and manipulation
classes.
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Quentin McDonald
from textwrap import dedent
from schrodinger.infra import canvas
############# Canvas classes begin here ##################################
[docs]class CanvasFingerprintGenerator(object):
"""
A class to encapsulate canvas finger print generation.
Fingerprints may be generated and returned as Fingerprint objects
or may be written to a file.
"""
FINGERPRINT_TYPES = [
'Linear', 'Radial', 'MolPrint2D', 'Atom Pairs', 'Atom Triplets',
'Topological Torsions', 'Dendritic'
]
SHORT_FINGERPRINT_TYPES = [
'linear', 'radial', 'molprint2D', 'pairwise', 'triplet', 'torsion',
'dendritic'
]
ATOM_TYPING_SCHEMES = [
" 1. All atoms equivalent; all bonds equivalent.",
" 2. Atoms distinguished by HB acceptor/donor; all bonds equivalent.",
" 3. Atoms distinguished by hybridization state; all bonds equivalent",
" 4. Atoms distinguished by functional type: {H}, {C}, {F,Cl}, {Br,I}, {N,0}, {S}, {other}; bonds by hybridization.",
" 5. Mol2 atom types; all bonds equivalent.",
" 6. Atoms distinguished by whether terminal, halogen, HB acceptor/donor bonds distinguished by bond order",
" 7. Atomic number and bond order",
" 8. Atoms distinguished by ring size, aromaticity, HB acceptor/donor, ionization potential, whether terminal, whether halogen; bonds distinguished by bond order",
" 9. Carhart atom types (atom-pairs approach); all bonds equivalent.",
"10. Daylight invariant atom types; bonds distinguished by bond order.",
"11. Same as 7, but aromatic distinguished from non-aromatic",
"12. Same as 10, but cyclic aliphatic distinguished from acyclic aliphatic"
]
PRECISION = [32, 64]
# Record the default atom typing scheme for each fingerprint type. Note
# that dendritic defaults to the linear atom typing scheme.
DEFAULT_ATOM_TYPING_SCHEMES = {
"linear": canvas.DEFAULT_LINEAR_ATOM_TYPING_STYLE,
"radial": canvas.DEFAULT_RADIAL_ATOM_TYPING_STYLE,
"torsion": canvas.DEFAULT_TORSIONAL_ATOM_TYPING_STYLE,
"pairwise": canvas.DEFAULT_PAIRWISE_ATOM_TYPING_STYLE,
"triplet": canvas.DEFAULT_TRIPLET_ATOM_TYPING_STYLE,
"molprint2D": canvas.DEFAULT_MOLPRINT2D_ATOM_TYPING_STYLE,
"dendritic": canvas.DEFAULT_LINEAR_ATOM_TYPING_STYLE
}
def __del__(self):
if self._filename is not None:
self.close()
[docs] def __init__(self, logger, default_type='Linear'):
self._fingerprinter = None
self._filename = None
self._current_type = default_type
self._precision = int(self.PRECISION[0])
self._current_file_name = None
self._adaptor = canvas.ChmMmctAdaptor()
self._logger = logger
self._linear_names = [
self.FINGERPRINT_TYPES[0], self.SHORT_FINGERPRINT_TYPES[0]
]
self._radial_names = [
self.FINGERPRINT_TYPES[1], self.SHORT_FINGERPRINT_TYPES[1]
]
self._molprint2D_names = [
self.FINGERPRINT_TYPES[2], self.SHORT_FINGERPRINT_TYPES[2]
]
self._atom_pair_names = [
self.FINGERPRINT_TYPES[3], self.SHORT_FINGERPRINT_TYPES[3]
]
self._triplet_names = [
self.FINGERPRINT_TYPES[4], self.SHORT_FINGERPRINT_TYPES[4]
]
self._torsion_names = [
self.FINGERPRINT_TYPES[5], self.SHORT_FINGERPRINT_TYPES[5]
]
self._dendritic_names = [
self.FINGERPRINT_TYPES[6], self.SHORT_FINGERPRINT_TYPES[6]
]
self._long_names_to_short = {}
for (long_name, short) in zip(self.FINGERPRINT_TYPES,
self.SHORT_FINGERPRINT_TYPES):
self._long_names_to_short[long_name] = short
# Assign the default atom/bond type. This needs to be done at
# the end after initialization of long_names_to_short:
self._current_atom_bond_type = self.getDefaultAtomTypingScheme()
[docs] def debug(self, output):
"""
Wrapper for debug logging, just to simplify logging
"""
self._logger.debug(output)
[docs] def getDefaultAtomTypingScheme(self):
"""
Once the fingerprint type has been set then this method will
return the default atom typing scheme appropriate for that
fingerprint type
"""
# Check to see if it's a long name we currently have
# stored as the default value
if self._current_type in self._long_names_to_short:
nm = self._long_names_to_short[self._current_type]
else:
nm = self._current_type
return self.DEFAULT_ATOM_TYPING_SCHEMES[nm]
[docs] def getDescription(self):
"""
Returns a string representing a summary of the current
fingerprint settings
"""
desc = "%s with %s-bit precision\nAtom typing is\n '%s'" % (
self._current_type, self._precision,
self.ATOM_TYPING_SCHEMES[int(self._current_atom_bond_type) - 1])
return desc
[docs] def getCurrentType(self):
"""
Returns the name of the fingerprint type current set:
"""
return self._current_type
[docs] def setType(self, fp_type):
"""
Set the type of fingerprints to be generated by this generator.
The type must be one of the values in the class variable
CanvasFingerPrintGenerator.FINGERPRINT_TYPE
"""
if (fp_type not in self.FINGERPRINT_TYPES and
fp_type not in self.SHORT_FINGERPRINT_TYPES):
raise Exception("Unknown fingerprint type %s" % fp_type)
if fp_type != self._current_type:
self.debug("FPGen - Setting type to '%s'" % fp_type)
self._current_type = fp_type
# We need to regenerate the fingerprinter if the fingerprint
# type changes:
if self._fingerprinter is not None:
self._fingerprinter = None
return
[docs] def setPrecision(self, precision):
"""
Set the number of bits to be used for fingerprint generation.
"""
if int(precision) not in self.PRECISION:
raise Exception("Unknown bit width %d" % precision)
self._precision = precision
self.debug("FPGen - Setting num bits to '%d'" % precision)
# We need to regenerate the finger printer if the fingerprint
# type changes:
if self._fingerprinter is not None:
del self._fingerprinter
self._fingerprinter = None
[docs] def getPrecision(self):
"""
Returns the current number of bits used for fingerprinting
"""
return self._precision
[docs] def setAtomBondTyping(self, atom_bond_typing):
"""
Set the atom typing scheme. This must be an integer from 1 to the number
of atom typing schemes.
The atom typing schemes are described in the class variable
ATOM_TYPING_SCHEMES
"""
if ((type(atom_bond_typing) != int) or atom_bond_typing < 1 or
atom_bond_typing > len(self.ATOM_TYPING_SCHEMES)):
raise Exception(
"Unknown atom typing index: %d, must be between 1 and %d" %
(int(atom_bond_typing), len(self.ATOM_TYPING_SCHEMES)))
if atom_bond_typing != self._current_atom_bond_type:
self._current_atom_bond_type = atom_bond_typing
# We need to regenerate the finger print if the atom typing
# scheme changes:
if self._fingerprinter is not None:
del self._fingerprinter
self._fingerprinter = None
self.debug("FPGen - Setting atom/bond typing to '%d'" %
atom_bond_typing)
return
[docs] def getCurrentAtomBondTyping(self):
"""
Returns the current atom bond typing value
"""
return self._current_atom_bond_type
def _getFingerprinter(self):
"""
A private method which will return a CanvasFingerprinter object
appropriate to the current type and atom typing settings
"""
atype = self._current_atom_bond_type
if self._fingerprinter is not None:
return self._fingerprinter
elif self._current_type in self._linear_names:
# Linear:
if self._precision == 32:
self._fingerprinter = canvas.ChmLinearOut32(atype)
self.debug("FPGen - creating ChmLinearOut32(%d)" % atype)
else:
self._fingerprinter = canvas.ChmLinearOut64(atype)
self.debug("FPGen - creating ChmLinearOut64(%d)" % atype)
elif self._current_type in self._radial_names:
# Radial
if self._precision == 32:
self.debug("FPGen - creating ChmRadialOut32(%d)" % atype)
self._fingerprinter = canvas.ChmRadialOut32(atype)
else:
self.debug("FPGen - creating ChmRadialOut64(%d)" % atype)
self._fingerprinter = canvas.ChmRadialOut64(atype)
elif self._current_type in self._molprint2D_names:
# MolPrint2D
if self._precision == 32:
self._fingerprinter = canvas.ChmMolprint2D32(atype)
self.debug("FPGen - creating ChmMolprint2D32(%d)" % atype)
else:
self._fingerprinter = canvas.ChmMolprint2D64(atype)
self.debug("FPGen - creatingMolprint2D64(%d)" % atype)
elif self._current_type in self._atom_pair_names:
# Atom Pairs
if self._precision == 32:
self._fingerprinter = canvas.ChmPairwiseOut32(atype)
self.debug("FPGen - creating ChmPairwise32(%d)" % atype)
else:
self._fingerprinter = canvas.ChmPairwiseOut64(atype)
self.debug("FPGen - creating ChmPairwise64(%d)" % atype)
elif self._current_type in self._triplet_names:
# Atom Triplets
if self._precision == 32:
self._fingerprinter = canvas.ChmTripletOut32(atype)
self.debug("FPGen - creating ChmTripletOut32(%d)" % atype)
else:
self._fingerprinter = canvas.ChmTripletOut64(atype)
self.debug("FPGen - creating ChmTripletOut64(%d)" % atype)
elif self._current_type in self._torsion_names:
# Topological Torsions
if self._precision == 32:
self._fingerprinter = canvas.ChmTopologicalTorsionOut32(atype)
self.debug("FPGen - creating ChmTopologicalTorsionOut32(%d)" %
atype)
else:
self._fingerprinter = canvas.ChmTopologicalTorsionOut64(atype)
self.debug("FPGen - creating ChmTopologicalTorsionOut64(%d)" %
atype)
elif self._current_type in self._dendritic_names:
# Dendritic
if self._precision == 32:
self._fingerprinter = canvas.ChmDendriticOut32(atype)
self.debug("FPGen - creating ChmDendriticOut32(%d)" % atype)
else:
self._fingerprinter = canvas.ChmDendriticOut64(atype)
self.debug("FPGen - creating ChmDendriticOut64(%d)" % atype)
else:
raise Exception("Unknown fingerprint type: %s" % self._current_type)
return self._fingerprinter
[docs] def generate(self, st, chmmol=False, stereo=canvas.ChmMmctAdaptor.NoStereo):
"""
Return a fingerprint object using the current settings for
type, bit width and atom typing for the Structure object st
:type st: schrodinger.structure.Structure or canvas.base.chmmol object
:param st: structure to generate the fingerprint for
:type chmmol: True if the structure is a chmmol object, False if it is a
Structure object
:param stereo: stereo type that should be used when creating
chmmol from a Structure object
:type stereo: canvas.ChmMmctAdaptor.StereoType
"""
if not chmmol:
mol = self._adaptor.create(st.handle, stereo)
else:
mol = st
fprinter = self._getFingerprinter()
return fprinter.generate(mol)
[docs] def open(self, filename):
"""
Open a file to which fingerprints are to be written
"""
if self._filename is not None:
self.close()
fprinter = self._getFingerprinter()
fprinter.open(filename)
self._filename = filename
[docs] def write(self, st, fingerprint_id, chmmol=False):
"""
Create a fingerprint from the structure 'st' and add it to the
file with the ID 'fingerprint_id'. If a file has not been opened then raise
an exception
:type st: schrodinger.structure.Structure or canvas.base.chmmol object
:param st: structure to generate the fingerprint for
:type chmmol: True if the structure is a chmmol object, False if it is a
Structure object
"""
if self._filename is None:
raise Exception("Attempt to write when no file has been opened")
if not chmmol:
mol = self._adaptor.create(st.handle,
canvas.ChmMmctAdaptor.NoStereo)
else:
mol = st
fprinter = self._getFingerprinter()
fprinter.write(mol, str(fingerprint_id))
[docs] def close(self):
"""
Close the file which was previously open for finger print generation
"""
if self._filename is None:
raise Exception("Attempt to close when no file has been opened")
fprinter = self._getFingerprinter()
fprinter.close()
self._fingerprinter = None
self._filename = None
############# Command line specific classes start here:
[docs]class CanvasFingerprintGeneratorCLI(CanvasFingerprintGenerator):
"""
A subclass of the canvas fingerprint generator which is to be
used from a program with a command line interface. This class has
methods for defining options in an option parser and for applying
those options once they've been parsed. The idea is to provide a
standard command line interface for setting the fingerprint options
"""
[docs] def __init__(self, logger, default_type='Linear'):
super(CanvasFingerprintGeneratorCLI,
self).__init__(logger, default_type)
[docs] def addOptions(self, parser):
"""
Add options for fingerprint type, atom typing scheme and
number of bits to use.
The parser argument is an instance of argparse.ArgumentParser.
"""
parser.add_argument("-fp_type",
action="store",
type=str,
choices=self.SHORT_FINGERPRINT_TYPES,
metavar="<type name>",
default='linear',
help="Type of fingerprint")
parser.add_argument(
"-fp_ab_type",
action="store",
metavar="<index>",
# Note set default to non-existant type so we use default
# for fingerprint type if it's not specified:
default=-1,
type=int,
help="Atom bond typing scheme (1-%d, see above)" %
len(self.ATOM_TYPING_SCHEMES))
parser.add_argument(
"-fp_bits",
action="store",
default=32,
type=int,
choices=self.PRECISION,
metavar="<bits>",
help="Bit precision to use in fingerprint (32 or 64)")
[docs] def parseOptions(self, options):
"""
Examine the options and set the internal state to reflect
them.
"""
self.setType(options.fp_type)
fp_ab_type = int(options.fp_ab_type)
if fp_ab_type == -1:
# Default - set it based on current fingerprint type
fp_ab_type = self.getDefaultAtomTypingScheme()
self.setAtomBondTyping(fp_ab_type)
self.setPrecision(int(options.fp_bits))
[docs] def getOptionDesc(self):
"""
A method which returns a summary of the options supported by
the fingerprint generator
"""
return "-fp_type <%s> -fp_ab_type <1-%d> -fp_bits" % ("".join(
self.SHORT_FINGERPRINT_TYPES), len(self.ATOM_TYPING_SCHEMES))
[docs] def getAtomBondTypingSchemeDescription(self):
"""
Return a string which contains a description of the atom and
bond typing schemes available for fingerprint generation
"""
desc = """
Atom and bond typing schemes are described by an integer from 1 to %d.
The schemes are: \n""" % len(self.ATOM_TYPING_SCHEMES)
for i in range(len(self.ATOM_TYPING_SCHEMES)):
desc = "%s %s\n" % (desc, self.ATOM_TYPING_SCHEMES[i])
desc += """
The default values depend on the fingerprint types:
linear=%(linear)d
radial=%(radial)d
molprint2D=%(molprint2D)d
pairwise=%(pairwise)d
triplet=%(triplet)d
torsion=%(torsion)d
dendritic=%(dendritic)d
""" % self.DEFAULT_ATOM_TYPING_SCHEMES
return dedent(desc)
[docs] def getFingerprintDescription(self):
"""
Return a string which contains a description of the atom and
bond typing schemes available for fingerprint generation
"""
desc = """
Available finger print types are (-fp_type arg) \n"""
for i in range(len(self.FINGERPRINT_TYPES)):
desc = "%s %s (%s)\n" % (desc, self.FINGERPRINT_TYPES[i],
self.SHORT_FINGERPRINT_TYPES[i])
return dedent(desc)