"""
Module which computes constitutional and topological descriptors, walk and
path counts, and connectivity indices.
Details of the descriptors implemented here are taken from:
"Handbook of Molecular Descriptors" by Mannhols, Kubinyi and Timmerman
"Molecular Descriptors for Chemoinformatics, Vol. I", by Todeschini and Consonni
Copyright Schrodinger, LLC. All rights reserved.
"""
import bisect
import math
from collections import Counter
from collections import defaultdict
from collections import namedtuple
from functools import partial
from itertools import combinations_with_replacement
from past.utils import old_div
import networkx
import numpy
from decorator import decorator
import schrodinger.application.canvas.base as canvas_base
from schrodinger.infra import canvas
from schrodinger.infra import mm
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.utils import log
logger = log.get_output_logger(__file__)
# Descriptor container
Descriptor = namedtuple("Descriptor", ["func", "cast_type", "label", "key"])
TOPO_DESCRIPTORS_PRODUCT = "desc"
HEAVY_ATOM_MAX = 150
# CANVAS-27492: Round floats to avoid platform differences
FLOAT_TRUNCATION = 8
[docs]class TopoError(Exception):
"""
Error raised when unable to calculate topological descriptor
"""
[docs]@decorator
def cached_topo_descriptor(func, self, *args, **kwargs):
"""
Decorator to extract previously calculated descriptor value from
cached descriptors dictionary. If not available, the descriptor is
calculated and stored for future reference.
"""
key = tuple([func.__name__]) + args
if key in self._cached_property:
return self._cached_property[key]
value = func(self, *args, **kwargs)
self._cached_property[key] = value
return value
[docs]class TopologicalDescriptors(object):
[docs] def __init__(self, default_descriptor_value=None):
"""
Initializes valid topological descriptors, and creates an empty
cached property dictionary.
"""
self._default_value = default_descriptor_value
self._cached_property = {}
self._available_descriptors = self._getDescriptorEntries()
[docs] def allDescriptorFullLabels(self):
"""
Returns all topological descriptor labels as label-key string
:return: list of all descriptors as "Label (Key)" strings
:rtype: list of str
"""
return [
self._makeFullLabel(desc) for desc in self._available_descriptors
]
[docs] def calculateTopologicalDescriptors(self, st, descriptor_strings):
"""
Calculates a set of topological descriptors for a given structure.
:param st: structure to consider
:type st: `structure.Structure`
:param descriptor_strings: list of descriptors to calculate
:type descriptor_strings: list of str
:return: Structure copy appended with successful descriptor properties
:rtype: `structure.Structure`
"""
# Clear any previously cached properties
self._cached_property.clear()
# Create structure copy to which new properties can be appended
working_st = self._extractSingleStructure(st)
final_st = st.copy()
# Ignore H or molecules with more 150 heavy atoms
natom = self._getAtomCount(working_st)
if natom == 0 or natom > HEAVY_ATOM_MAX:
logger.warning("Molecule \"%s\" rejected:" % st.title)
logger.warning("%d heavy atoms in the given molecule" % natom)
return final_st
for desc_str in descriptor_strings:
try:
desc_entry = self._findDescriptorEntry(desc_str)
property_name = self.m2ioPropertyName(desc_entry)
value = self._evaluateDescriptor(desc_entry, working_st)
final_st.property[property_name] = value
except TopoError as e:
logger.info("-- unable to compute %s: %s" % (desc_str, e))
return final_st
[docs] def m2ioPropertyName(self, descriptor):
"""
Create descriptor m2io property name for a Descriptor.
:param descriptor: descriptor entry
:type descriptor: `Descriptor`
:return: m2io formatted descriptor property name
:rtype: str
"""
m2io_mapping = {float: "r", int: "i", bool: "b", str: "s"}
# Create property name from label without spaces
formatted_label = descriptor.label.strip().replace(" ", "_")
property_name = TOPO_DESCRIPTORS_PRODUCT + "_" + formatted_label
return m2io_mapping[descriptor.cast_type] + "_" + property_name
def _extractSingleStructure(self, st):
"""
Extracts a single structure if more than one is present.
:param st: Original structure
:type st: `structure.Structure`
:return: Structure consisting of single largest molecule
:rtype: `structure.Structure`
"""
# If there is more than one molecule in the structure, use the largest
if len(st.molecule) > 1:
logger.warning("Molecule \"%s\" modified:" % st.title)
logger.warning("More than one structure; using largest")
mol_size = [len(mol) for mol in st.molecule]
largest_mol_index = mol_size.index(max(mol_size)) + 1
return st.molecule[largest_mol_index].extractStructure()
# Otherwise the structure is fine as-is
return st
def _findDescriptorEntry(self, desc_str):
"""
Locates the corresponding Descriptor in the class tuple of Descriptors.
Looks for Descriptor based on matching the passed string to any of the
key, label, or "label (key)" strings.
:param desc_str: input descriptor string
:type desc_str: str
:return: Descriptor namedtuple
:rtype: `Descriptor`
:raise TopoError: If the descriptor entry could not be found
"""
for entry in self._available_descriptors:
if desc_str in [entry.key, entry.label, self._makeFullLabel(entry)]:
return entry
# Descriptor not found
raise TopoError("\"%s\" unrecognized" % desc_str)
def _makeFullLabel(self, descriptor):
"""
Create complete descriptor label-key string
:param descriptor: Descriptor namedtuple
:type descriptor: `Descriptor`
:return: full descriptor label
:rtype: str
"""
return "%s (%s)" % (descriptor.label, descriptor.key)
def _evaluateDescriptor(self, descriptor, st):
"""
Evalulates a descriptor for a given structure. If successful, the value
is cast to the expected type before returning.
:param descriptor: Descriptor namedtuple
:type descriptor: `Descriptor`
:return: descriptor value, cast to the appropriate return type
:rtype: one of {float, int, bool, str}
:raise TopoError: If the descriptor cast-type was unrecognized
"""
# Evaluate descriptor, supplying
try:
value = descriptor.func(st)
except TopoError as e:
if self._default_value is not None:
value = self._default_value
else:
raise TopoError(e)
# Cast function call result
try:
if descriptor.cast_type is float:
value = round(float(value), FLOAT_TRUNCATION)
elif descriptor.cast_type is int:
value = int(value)
return value
except TypeError as e:
raise TopoError("Unable to cast to %s" % descriptor.cast_type)
# Unknown cast_type
raise TopoError("Unknown cast value %s" % descriptor.cast_type)
# =========================================================================
# Descriptor methods
# =========================================================================
def _getDescriptorEntries(self):
"""
Returns set of valid descriptor tuples in the form of:
(descriptor key, descriptor label, function mapping)
:return: set of available topological descriptor mappings
:rtype: set of tuples
"""
entries = []
label = "First Zagreb"
func = self._getFirstZagrebIndex
entries.append(Descriptor(func, int, label, "ZM1"))
label = "First Zagreb index by valence vertex degrees"
func = self._getFirstZagrebValenceIndex
entries.append(Descriptor(func, float, label, "ZM1V"))
label = "Second Zagreb"
func = self._getSecondZagrebIndex
entries.append(Descriptor(func, int, label, "ZM2"))
label = "Second Zagreb index by valence vertex degrees"
func = self._getSecondZagrebValenceIndex
entries.append(Descriptor(func, float, label, "ZM2V"))
label = "Polarity"
func = self._getPolarityNumber
entries.append(Descriptor(func, int, label, "Pol"))
label = "Narumi Simple Topological"
func = self._getSimpleTopologicalIndex
entries.append(Descriptor(func, float, label, "NST"))
label = "Narumi Harmonic Topological"
func = self._getHarmonicTopologicalIndex
entries.append(Descriptor(func, float, label, "NHT"))
label = "Narumi Geometric Topological"
func = self._getGeometricTopologicalIndex
entries.append(Descriptor(func, float, label, "NGT"))
label = "Total structure connectivity"
func = self._getTotalStructureConnectivityIndex
entries.append(Descriptor(func, float, label, "TSC"))
label = "Wiener"
func = self._getWienerIndex
entries.append(Descriptor(func, int, label, "W"))
label = "Mean Wiener"
func = self._getMeanWienerIndex
entries.append(Descriptor(func, float, label, "MW"))
label = "Xu"
func = self._getXuIndex
entries.append(Descriptor(func, float, label, "Xu"))
label = "Quadratic"
func = self._getQuadraticIndex
entries.append(Descriptor(func, int, label, "QIndex"))
label = "Radial centric"
func = self._getRadialCentricInformationIndex
entries.append(Descriptor(func, float, label, "RC"))
label = "Mean Square Distance Balaban"
func = self._getMeanSquareDistanceIndex
entries.append(Descriptor(func, float, label, "MSDB"))
label = "Superpendentic"
func = self._getSuperpendenticIndex
entries.append(Descriptor(func, float, label, "SP"))
label = "Harary"
func = self._getHararyIndex
entries.append(Descriptor(func, float, label, "Har"))
label = "Log of product of row sums"
func = self._getLogPRSIndex
entries.append(Descriptor(func, float, label, "LPRS"))
label = "Pogliani"
func = self._getPoglianiIndex
entries.append(Descriptor(func, float, label, "Pog"))
label = "Schultz Molecular Topological"
func = self._getSchultzMolecularTopologicalIndex
entries.append(Descriptor(func, int, label, "SMT"))
label = "Schultz Molecular Topological by valence vertex degrees"
func = self._getSchultzMolecularTopologicalValenceIndex
entries.append(Descriptor(func, float, label, "SMTV"))
label = "Mean Distance Degree Deviation"
func = self._getMeanDistanceDegreeDeviation
entries.append(Descriptor(func, float, label, "MDDD"))
label = "Ramification"
func = self._getRamificationIndex
entries.append(Descriptor(func, int, label, "Ram"))
label = "Gutman Molecular Topological"
func = self._getGutmanMolecularTopologicalIndex
entries.append(Descriptor(func, int, label, "GMT"))
label = "Gutman MTI by valence vertex degrees"
func = self._getGutmanMolecularTopologicalValenceIndex
entries.append(Descriptor(func, float, label, "GMTV"))
label = "Average vertex distance degree"
func = self._getAverageDistanceDegree
entries.append(Descriptor(func, float, label, "AVDD"))
label = "Unipolarity"
func = self._getUnipolarity
entries.append(Descriptor(func, int, label, "UP"))
label = "Centralization"
func = self._getCentralization
entries.append(Descriptor(func, int, label, "CENT"))
label = "Variation"
func = self._getVariation
entries.append(Descriptor(func, int, label, "VAR"))
label = "Molecular electrotopological variation"
func = self._getMolecularElectrotopologicalVariation
entries.append(Descriptor(func, float, label, "MEV"))
label = "Maximal electrotopological positive variation"
func = self._getMaximalElectrotopologicalPositiveVariation
entries.append(Descriptor(func, float, label, "MEPV"))
label = "Maximal electrotopological negative variation"
func = self._getMaximalElectrotopologicalNegativeVariation
entries.append(Descriptor(func, float, label, "MENV"))
label = "Eccentric connectivity"
func = self._getEccentricConnectivityIndex
entries.append(Descriptor(func, int, label, "ECCc"))
label = "Eccentricity"
func = self._getEccentricity
entries.append(Descriptor(func, int, label, "ECC"))
label = "Average eccentricity"
func = self._getAverageEccentricity
entries.append(Descriptor(func, float, label, "AECC"))
label = "Eccentric"
func = self._getEccentric
entries.append(Descriptor(func, float, label, "DECC"))
for i in range(6):
label = "Valence connectivity index chi-%d" % i
func = partial(self._getValenceConnectivityIndex, m=i)
entries.append(Descriptor(func, float, label, "vX%d" % i))
for i in range(6):
label = "Average valence connectivity index chi-%d" % i
func = partial(self._getMeanValenceConnectivityIndex, m=i)
entries.append(Descriptor(func, float, label, "AvX%d" % i))
label = "Quasi Wiener"
func = self._getQuasiWienerIndex
entries.append(Descriptor(func, float, label, "QW"))
label = "First Mohar"
func = self._getFirstMoharIndex
entries.append(Descriptor(func, float, label, "FM"))
label = "Second Mohar"
func = self._getSecondMoharIndex
entries.append(Descriptor(func, float, label, "SM"))
label = "Spanning tree number"
func = self._getSpanningTreeNumber
entries.append(Descriptor(func, float, label, "STN"))
label = "Kier benzene-likeliness index"
func = self._getBenzeneLikelinessIndex
entries.append(Descriptor(func, float, label, "KBLI"))
for i in range(1, 11):
label = "Topological charge index of order %d" % i
func = partial(self._getTopologicalChargeIndex, k=i)
entries.append(Descriptor(func, float, label, "TCI%d" % i))
for i in range(1, 11):
label = "Mean topological charge index of order %d" % i
func = partial(self._getMeanTopologicalChargeIndex, k=i)
entries.append(Descriptor(func, float, label, "MTCI%d" % i))
label = "Global topological charge"
func = self._getGlobalTopologicalChargeIndex
entries.append(Descriptor(func, float, label, "GTC"))
label = "Hyper-distance-path index"
func = self._getHyperDistancePathIndex
entries.append(Descriptor(func, float, label, "HDPI"))
label = "Reciprocal hyper-distance-path index"
func = self._getReciprocalHyperDistancePathIndex
entries.append(Descriptor(func, float, label, "RHDPI"))
label = "Square reciprocal distance sum"
func = self._getSquareReciprocalDistanceSumIndex
entries.append(Descriptor(func, float, label, "SRDS"))
label = "Modified Randic connectivity"
func = self._getModifiedRandicIndex
entries.append(Descriptor(func, float, label, "MRC"))
label = "Balaban centric"
func = self._getBalabanCentricIndex
entries.append(Descriptor(func, float, label, "BC"))
label = "Lopping centric"
func = self._getLoppingCentricInformationIndex
entries.append(Descriptor(func, float, label, "LC"))
label = "Kier Hall electronegativity"
func = self._getKierHallElectronegativitySum
entries.append(Descriptor(func, float, label, "KHE"))
elements = ["N", "O", "S", "P", "F", "Cl", "Br", "I"]
for a, b in combinations_with_replacement(elements, 2):
label = "Sum of topological distances between %s..%s" % (a, b)
func = partial(self._getTopologicalDistanceSum, elem1=a, elem2=b)
key = "STD(%s,%s)" % (a, b)
entries.append(Descriptor(func, int, label, key))
label = "Wiener-type index from Z weighted distance matrix - Barysz matrix"
func = self._getBaryszWeinerTypeIndex
entries.append(Descriptor(func, float, label, "WhetZ"))
label = "Wiener-type index from electronegativity weighted distance matrix"
func = self._getElectronegativityWienerTypeIndex
entries.append(Descriptor(func, float, label, "Whete"))
label = "Wiener-type index from mass weighted distance matrix"
func = self._getMassWienerTypeIndex
entries.append(Descriptor(func, float, label, "Whetm"))
label = "Wiener-type index from van der waals weighted distance matrix"
func = self._getVDWVolumeWienerTypeIndex
entries.append(Descriptor(func, float, label, "Whetv"))
label = "Wiener-type index from polarizability weighted distance matrix"
func = self._getPolarizabilityWienerTypeIndex
entries.append(Descriptor(func, float, label, "Whetp"))
label = "Balaban-type index from Z weighted distance matrix - Barysz matrix"
func = self._getBaryszIndex
entries.append(Descriptor(func, float, label, "JhetZ"))
label = "Balaban-type index from electronegativity weighted distance matrix"
func = self._getElectronegativityBalabanTypeIndex
entries.append(Descriptor(func, float, label, "Jhete"))
label = "Balaban-type index from mass weighted distance matrix"
func = self._getMassBalabanTypeIndex
entries.append(Descriptor(func, float, label, "Jhetm"))
label = "Balaban-type index from van der waals weighted distance matrix"
func = self._getVDWVolumeBalabanTypeIndex
entries.append(Descriptor(func, float, label, "Jhetv"))
label = "Balaban-type index from polarizability weighted distance matrix"
func = self._getPolarizabilityBalabanTypeIndex
entries.append(Descriptor(func, float, label, "Jhetp"))
label = "Topological diameter"
func = self._getTopologicalDiameter
entries.append(Descriptor(func, int, label, "TD"))
label = "Topological radius"
func = self._getTopologicalRadius
entries.append(Descriptor(func, int, label, "TR"))
label = "Petitjean 2D shape"
func = self._getPetitjeanShapeIndex
entries.append(Descriptor(func, float, label, "PJ2DS"))
label = "Balaban distance connectivity index"
func = self._getBalabanDistanceConnectivityIndex
entries.append(Descriptor(func, float, label, "J"))
for i in range(6):
label = "Solvation connectivity index chi-%d" % i
func = partial(self._getSolvationConnectivityIndex, m=i)
entries.append(Descriptor(func, float, label, "SCIX%d" % i))
for i in range(6):
label = "Connectivity index chi-%d" % i
if i == 1:
label = "Connectivity chi-1 [Randic connectivity]"
func = partial(self._getConnectivityIndex, m=i)
entries.append(Descriptor(func, float, label, "CIX%d" % i))
for i in range(6):
label = "Average connectivity index chi-%d" % i
func = partial(self._getMeanConnectivityIndex, m=i)
entries.append(Descriptor(func, float, label, "ACIX%d" % i))
label = "reciprocal distance Randic-type index"
func = self._getRDCHIIndex
entries.append(Descriptor(func, float, label, "RDR"))
label = "reciprocal distance square Randic-type index"
func = self._getRDSQIndex
entries.append(Descriptor(func, float, label, "RDSR"))
for i in range(1, 4):
label = "%d-path Kier alpha-modified shape index" % i
func = partial(self._getKierAlphaModifiedShapeIndex, m=i)
entries.append(Descriptor(func, float, label, "KAMS%d" % i))
label = "Kier flexibility"
func = self._getKierMolecularFlexibilityIndex
entries.append(Descriptor(func, float, label, "KF"))
for i in range(2, 6):
label = "path/walk %d - Randic shape index" % i
func = partial(self._getMolecularPathWalkIndex, m=i)
entries.append(Descriptor(func, float, label, "RSIpw%d" % i))
label = "E-state topological parameter"
func = self._getEStateIndex
entries.append(Descriptor(func, float, label, "ETP"))
label = "Chirality count"
func = self._getChiralAtomCount
entries.append(Descriptor(func, int, label, "CHRCOUNT"))
for i in range(3, 21):
label = "Ring Count %d" % i
func = partial(self._getRingCount, n=i)
entries.append(Descriptor(func, int, label, "RNGCNT%d" % i))
label = "Atom Count"
func = self._getAtomCount
entries.append(Descriptor(func, int, label, "ATMCNT"))
label = "Bond Count"
func = self._getBondCount
entries.append(Descriptor(func, int, label, "BNDCNT"))
label = "Atoms in Ring System"
func = self._getRingAtomCount
entries.append(Descriptor(func, int, label, "ATMRNGCNT"))
label = "Bonds in Ring System"
func = self._getRingBondCount
entries.append(Descriptor(func, int, label, "BNDRNGCNT"))
label = "Cyclomatic number"
func = self._getCyclomaticNumber
entries.append(Descriptor(func, int, label, "CYCLONUM"))
label = "Number of ring systems"
func = self._getNumberRingSystems
entries.append(Descriptor(func, int, label, "NRS"))
label = "Normalized number of ring systems"
func = self._getNormalizedNumberRingSystems
entries.append(Descriptor(func, float, label, "NNRS"))
label = "Ring Fusion degree"
func = self._getRingFusionDegree
entries.append(Descriptor(func, float, label, "RFD"))
label = "Total ring size"
func = self._getTotalRingSize
entries.append(Descriptor(func, int, label, "TRS"))
label = "Ring perimeter"
func = self._getRingPerimeter
entries.append(Descriptor(func, int, label, "RNGPERM"))
label = "Ring bridge count"
func = self._getRingBridges
entries.append(Descriptor(func, int, label, "RNGBDGE"))
label = "Molecule cyclized degree"
func = self._getMolecularCyclizedDegree
entries.append(Descriptor(func, float, label, "MCD"))
label = "Ring Fusion density"
func = self._getRingFusionDensity
entries.append(Descriptor(func, float, label, "RFDELTA"))
label = "Ring complexity index"
func = self._getRingComplexityIndex
entries.append(Descriptor(func, float, label, "RCI"))
label = "Van der Waals surface area"
func = self._getTotalVDWSurfaceArea
entries.append(Descriptor(func, float, label, "VSA"))
for i in range(1, 9):
label = "MR%d" % i
func = partial(self._getPVSAMolarRefractivity, n=i)
entries.append(Descriptor(func, float, label, "MR%d" % i))
for i in range(1, 11):
label = "ALOGP%d" % i
func = partial(self._getPVSAlogP, n=i)
entries.append(Descriptor(func, float, label, "ALOGP%d" % i))
for i in range(1, 15):
label = "PEOE%d" % i
func = partial(self._getPVSAPartialCharges, n=i)
entries.append(Descriptor(func, float, label, "PEOE%d" % i))
return tuple(entries)
# =========================================================================
# Common descriptor asserts
# =========================================================================
def _assertMultipleHeavyAtoms(self, st):
"""
Raises a TopoError if the structure has only one heavy atom
"""
if self._getAtomCount(st) == 1:
raise TopoError("Requires more than one atom")
def _assertPathLength(self, st, m):
"""
Raises a TopoError if the structure does not have any paths of length m
"""
if m not in self._getConnectivityPaths(st):
raise TopoError("No paths of length %d present in molecule" % m)
# =========================================================================
# Descriptor methods
# =========================================================================
@cached_topo_descriptor
def _getFirstZagrebIndex(self, st):
"""
Handbook p. 509: First Zagreb index (M_1) -- topological index based on
atomic vertex degrees. The first index in strictly related to zero-order
connectivity index. Also called the Gutman index.
"""
vertex_func = self._getVertexDegree
# NOTE: vertex_func(i)^2 instead of _generalizedConnectivityIndex
return sum(
[pow(vertex_func(st, i), 2) for i in self._getAtomIndices(st)])
@cached_topo_descriptor
def _getFirstZagrebValenceIndex(self, st):
"""
Handbook p. 509: First valence Zagreb index (M^v_1) -- topological index
based on atomic valence vertex degrees. The first index in strictly
related to zero-order connectivity index.
"""
vertex_func = self._getValenceVertexDegree
# NOTE: vertex_func(i)^2 instead of _generalizedConnectivityIndex
return sum(
[pow(vertex_func(st, i), 2) for i in self._getAtomIndices(st)])
@cached_topo_descriptor
def _getSecondZagrebIndex(self, st):
"""
Handbook p. 509: Second Zagreb index (M_2) -- topological index based on
atomic vertex degrees. The second index in strictly related to
first-order connectivity index; it is part of the Schuttz molecular
topological index.
"""
vertex_func = self._getVertexDegree
return self._generalizedConnectivityIndex(st, 1, vertex_func, 1.0)
@cached_topo_descriptor
def _getSecondZagrebValenceIndex(self, st):
"""
Handbook p. 509: Second valence Zagreb index (M^v_2) -- topological
index based on atomic valence vertex degrees. The second index in
strictly related to first-order connectivity index.
"""
vertex_func = self._getValenceVertexDegree
return self._generalizedConnectivityIndex(st, 1, vertex_func, 1.0)
@cached_topo_descriptor
def _getPolarityNumber(self, st):
"""
Handbook p. 114: polarity number (p_2) -- also known as Wiener polarity
number; the number of pairs of graph vertices which are separated by
three edges. It is usually assumed that the polarity number accounts
for the flexibility of acyclic structures, p being equal to the number
of bonds around which free rotations can take place.
"""
return self._getDistanceCount(st, 3)
@cached_topo_descriptor
def _getSimpleTopologicalIndex(self, st):
"""
Handbook p. 476: simple topological index (S) -- descriptor related to
molecular branching, proposed as the product of the vertex degrees
for each atom [Narumi, 1987].
NOTE: Take the natural log to avoid overflow.
"""
self._assertMultipleHeavyAtoms(st) # prevent log(0)
return sum([
math.log(self._getVertexDegree(st, i))
for i in self._getAtomIndices(st)
])
@cached_topo_descriptor
def _getHarmonicTopologicalIndex(self, st):
"""
Handbook p. 476: harmonic topological index (H) -- descriptor related
to the simple topological index [Narumi, 1987].
"""
self._assertMultipleHeavyAtoms(st) # prevent divide by zero
inv_vertex = [
old_div(1.0, self._getVertexDegree(st, i))
for i in self._getAtomIndices(st)
]
return old_div(self._getAtomCount(st), sum(inv_vertex))
@cached_topo_descriptor
def _getGeometricTopologicalIndex(self, st):
"""
Handbook p. 476: geometric topological index (G) -- descriptor related
to the simple topological index [Narumi, 1987].
"""
exponent = old_div(1.0, self._getAtomCount(st))
return pow(math.exp(self._getSimpleTopologicalIndex(st)), exponent)
@cached_topo_descriptor
def _getTotalStructureConnectivityIndex(self, st):
"""
Handbook p. 86: total structure connectivity index -- connectivity index
contemporarily accounting for all the atoms in the graph. This is
the square root of the simple topological index proposed by Narumi for
measuring molecular branching. (Handbook misprint: inverse square root)
"""
# NOTE: DRAGON uses the log of the Narumi Simple Topological index
return pow(math.exp(self._getSimpleTopologicalIndex(st)), -0.5)
def _wienerOperator(self, matrix):
"""
Handbook p. 9, Wiener operator W(M) -- half the sum of the off-diagonal
entries of the matrix M; name taken from the Wiener index.
"""
return 0.5 * numpy.sum(matrix)
@cached_topo_descriptor
def _getWienerIndex(self, st):
"""
Handbook p. 497: Wiener index (W) -- the sum over all bonds of the
product of the number of vertices on each side of the bond; ie. the sum
of all topological distancesinthe H-depleted molecular graph. Also
known as the Weiner number.
"""
return self._wienerOperator(self._getDistanceMatrix(st))
@cached_topo_descriptor
def _getMeanWienerIndex(self, st):
"""
Handbook p. 497: mean Wiener index (W_bar) -- defined from the Wiener
index as 2 * W / (A * (A - 1))
"""
self._assertMultipleHeavyAtoms(st) # prevent divide by zero
n = self._getAtomCount(st)
return 2.0 * self._getWienerIndex(st) / (n * (n - 1))
@cached_topo_descriptor
def _getXuIndex(self, st):
"""
Handbook p. 507: Xu index -- descriptor accounting for molecular size
and branching. Defined as sqrt(A) * ln(L), where L represents the
valence average topological distance calculated by vertex degree and
vertex distance degree of all atoms.
NOTE: Use natural log (comparable to E-DRAGON)
"""
self._assertMultipleHeavyAtoms(st)
indices = self._getAtomIndices(st)
delta = [self._getVertexDegree(st, i) for i in indices]
sigma = [self._getVertexDistanceDegree(st, i) for i in indices]
L = numpy.dot(numpy.multiply(delta, sigma), sigma)
L /= float(numpy.dot(delta, sigma))
return math.sqrt(self._getAtomCount(st)) * math.log(L)
@cached_topo_descriptor
def _getQuadraticIndex(self, st):
"""
Handbook p. 44: quadratic index (Q) -- obtained by normalization of the
first Zagreb index. Also called normalized quadratic index.
"""
M1 = self._getFirstZagrebIndex(st)
return 3 - (2 * self._getAtomCount(st)) + (old_div(M1, 2.0))
@cached_topo_descriptor
def _getRadialCentricInformationIndex(self, st):
"""
Handbook p. 44: radial centric information index (I^V_C,R) -- defined
as the lopping centric information index, but where ng is the number of
graph vertices having the same atom eccentricity.
"""
n = float(self._getAtomCount(st))
info_func = lambda x: -(old_div(x, n)) * math.log((old_div(x, n)), 2)
return self._centricIndexSum(st, self._getAtomEccentricity, info_func)
@cached_topo_descriptor
def _getMeanSquareDistanceIndex(self, st):
"""
Handbook p. 113: mean square distance index (MSD) -- calculated from the
second order distance distribution moment [Balaban, 1983a].
NOTE: Handbook misprint -- the square root should encompass [A * (A-1)]
"""
self._assertMultipleHeavyAtoms(st) # prevent divide by zero
n = float(self._getAtomCount(st))
sq_dist_sum = numpy.sum(numpy.square(self._getDistanceMatrix(st)))
return pow(old_div(sq_dist_sum, (n * (n - 1))), 0.5)
@cached_topo_descriptor
def _getSuperpendenticIndex(self, st):
"""
Handbook p. 431: superpendentic index -- calculated from the pendent
matrix, which is a submatrix of the distance matrix which is to the
number of atoms by the number of terminal vertices. The superpendentic
index is calculated as the square root of the sum of the products of
the nonzero row elements in the pendent matrix.
NOTE: As with DRAGON, instead of returning sqrt of sum of ow products:
pow(sum(numpy.prod(pendent_matrix, axis=1)), 0.5)
we avoid overflow by returning the sqrt of sum of log(row products)
"""
terminal_vertices = [
index for index in self._getAtomIndices(st)
if self._getVertexDegree(st, index) == 1
]
if not terminal_vertices:
raise TopoError("No terminal vertices for superpendentic")
# Instead of forming a new matrix, modify the distance matrix
pendent_matrix = self._getDistanceMatrix(st).copy()
# Ignore zeros on the diagonal
numpy.fill_diagonal(pendent_matrix, 1)
# Ignore columns that are not terminal vertices
for index in self._getAtomIndices(st):
if index not in terminal_vertices:
pendent_matrix[:, index - 1].fill(1)
# Return sqrt of sum of log(row products)
return pow(numpy.sum(numpy.log(pendent_matrix)), 0.5)
@cached_topo_descriptor
def _getHararyIndex(self, st):
"""
Handbook p. 209: Harary index (H) -- topological index derived from the
reciprocal distance matrix by the Wiener operator (Harary number)
"""
return self._wienerOperator(self._getReciprocalDistanceMatrix(st))
@cached_topo_descriptor
def _getLogPRSIndex(self, st):
"""
Handbook p. 116: Log of PRS index -- log(Product of Row Sums index)
defined as the lof of the product of the vertex distance degrees.
Taking the log is preferred due to the large values that can be reached
by the PRS index. NOTE: As per Handbook, log is base 10
"""
self._assertMultipleHeavyAtoms(st) # prevent log(0)
# Distribute log to avoid overflow
return sum([
math.log10(self._getVertexDistanceDegree(st, i))
for i in self._getAtomIndices(st)
])
@cached_topo_descriptor
def _getPoglianiIndex(self, st):
"""
Handbook p. 116: Pogliani index (D^nu) -- sum of the ratio of the number
of valence electrons to the principal quantum number for each atom.
"""
pogliani = 0.0
for atom in self._getHeavyStructure(st).atom:
Zv_i = mm.mmelement_get_valence_count(atom.atomic_number)
L_i = mm.mmelement_get_period(atom.atomic_number)
pogliani += old_div(Zv_i, float(L_i))
return pogliani
def _baseSchultzIndex(self, st, vertex_func):
"""
Handbook p. 381: Schultz molecular topological index (MTI) -- index
derived from the adjacency and distance matrices, defined as the sum
over (A+D).v intricacy numbers.
NOTE: Handbook decomposes the index into two parts, a sum of square
vertex degrees and vertex degree-vertex distance degree products, and
erroneously labels the former as "the second Zagreb index". The sum for
M2 is over only bonded pair i-j, whereas here is over ALL i-j pairs.
:param vertex_func: vertex degree function
:type vertex_func: f(st, index)
"""
MTI = 0.0
for i in self._getAtomIndices(st):
vertex_degree = vertex_func(st, i)
MTI += pow(vertex_degree, 2)
MTI += vertex_degree * self._getVertexDistanceDegree(st, i)
return MTI
@cached_topo_descriptor
def _getSchultzMolecularTopologicalIndex(self, st):
"""
Handbook p. 381: Schultz molecular topological index (MTI) --
molecular topological index calculated using the vertex degree.
"""
return self._baseSchultzIndex(st, self._getVertexDegree)
@cached_topo_descriptor
def _getSchultzMolecularTopologicalValenceIndex(self, st):
"""
Handbook p. 382: Schultz molecular topological valence index (MTI^v) --
a vertex-valency-weighted analogue to the Schultz molecular topological
index calculated using the valence vertex degree.
"""
return self._baseSchultzIndex(st, self._getValenceVertexDegree)
@cached_topo_descriptor
def _getMeanDistanceDegreeDeviation(self, st):
"""
Handbook p. 114: mean distance degree deviation (delta sigma) -- the
mean devaiationo f the row sum of the distance matrix from its mean.
"""
average = self._getAverageDistanceDegree(st)
return numpy.mean([
abs(self._getVertexDistanceDegree(st, i) - average)
for i in self._getAtomIndices(st)
])
@cached_topo_descriptor
def _getRamificationIndex(self, st):
"""
Handbook p. 475: ramification index (r) -- a simple ramification index
proposed for acyclic graphs, where the sum runs over all the vertex
degrees greater than two.
"""
delta = [self._getVertexDegree(st, i) for i in self._getAtomIndices(st)]
return sum(di - 2 for di in delta if di > 2)
def _baseGutmanIndex(self, st, vertex_func):
"""
Handbook p. 382: Gutman molecular topological index (S_G) -- sum of the
topological distance between the vertices vi and vj weighted by the
product of the endpoint vertex degrees.
:param vertex_func: vertex degree function
:type vertex_func: f(st, index)
"""
dist_matrix = self._getDistanceMatrix(st)
SG = 0
for i, j in self._getAtomPairs(st):
vi, vj = vertex_func(st, i), vertex_func(st, j)
SG += dist_matrix[i - 1][j - 1] * vi * vj
return SG
@cached_topo_descriptor
def _getGutmanMolecularTopologicalIndex(self, st):
"""
Handbook p. 382: Gutman molecular topological index (S_G) -- the
vertex degree weighted form of S_G.
"""
return self._baseGutmanIndex(st, self._getVertexDegree)
@cached_topo_descriptor
def _getGutmanMolecularTopologicalValenceIndex(self, st):
"""
Handbook p. 382: Gutman molecular topological valence index (S_G) --
a vertex-valency-weighted analogue of S_G, where the weighting factor
is multiplicative.
"""
return self._baseGutmanIndex(st, self._getValenceVertexDegree)
@cached_topo_descriptor
def _getAverageDistanceDegree(self, st):
"""
Handbook p. 114: average distance degree (sigma^bar) -- average row sum
of the distance matrix. Note this is also 2 * (Wiener index) / A.
"""
return numpy.mean([
self._getVertexDistanceDegree(st, i)
for i in self._getAtomIndices(st)
])
@cached_topo_descriptor
def _getUnipolarity(self, st):
"""
Handbook p. 114: unipolarity (simga*) -- minimum value of the vertex
distance degrees.
"""
return min([
self._getVertexDistanceDegree(st, i)
for i in self._getAtomIndices(st)
])
@cached_topo_descriptor
def _getCentralization(self, st):
"""
Handbook p. 114: centralization (delta sigma^*) -- molecular invariant
immediately derived from the distance matrix.
"""
n = self._getAtomCount(st)
return 2 * self._getWienerIndex(st) - n * self._getUnipolarity(st)
@cached_topo_descriptor
def _getVariation(self, st):
"""
Handbook p. 114: variation (delta sigma^+) -- molecular invariant
immediately derived from the distance matrix.
"""
unipolarity = self._getUnipolarity(st)
return max([
self._getVertexDistanceDegree(st, i) - unipolarity
for i in self._getAtomIndices(st)
])
@cached_topo_descriptor
def _getMolecularElectrotopologicalVariation(self, st):
"""
Adopted from DRAGON: molecular electrotopological variation
"""
return sum(numpy.fabs(self._getFieldEffects(st)))
@cached_topo_descriptor
def _getMaximalElectrotopologicalPositiveVariation(self, st):
"""
Adopted from DRAGON: maximal electrotopological positive variation
"""
positive_fe = [fe for fe in self._getFieldEffects(st) if fe > 0.0]
if not positive_fe:
raise TopoError("No positive field effect values")
return max(positive_fe)
@cached_topo_descriptor
def _getMaximalElectrotopologicalNegativeVariation(self, st):
"""
Adopted from DRAGON: maximal electrotopological negative variation
"""
positive_fe = [abs(fe) for fe in self._getFieldEffects(st) if fe < 0.0]
if not positive_fe:
raise TopoError("No negative field effect values")
return max(positive_fe)
@cached_topo_descriptor
def _getEccentricConnectivityIndex(self, st):
"""
Handbook p. 124: eccentric connectivity index (xi^C) -- the sum of the
products between eccentricity and vertex degree over all atoms.
"""
xi_sum = 0.0
for i in self._getAtomIndices(st):
nu_i = self._getAtomEccentricity(st, i)
delta_i = self._getVertexDegree(st, i)
xi_sum += nu_i * delta_i
return xi_sum
@cached_topo_descriptor
def _getEccentricity(self, st):
"""
Handbook p. 112: eccentricity (nu) -- sum of atom eccentricities.
"""
return sum([
self._getAtomEccentricity(st, i) for i in self._getAtomIndices(st)
])
@cached_topo_descriptor
def _getAverageEccentricity(self, st):
"""
Handbook p. 112: average atom eccentricity (nu^bar) -- average of atom
eccentricities.
"""
return old_div(float(self._getEccentricity(st)), self._getAtomCount(st))
@cached_topo_descriptor
def _getEccentric(self, st):
"""
Handbook p. 112: eccentricity (Delta nu) -- mean deviation from average
of atom eccentricities.
"""
average = self._getAverageEccentricity(st)
dev_sum = sum([
abs(self._getAtomEccentricity(st, i) - average)
for i in self._getAtomIndices(st)
])
return old_div(float(dev_sum), self._getAtomCount(st))
@cached_topo_descriptor
def _getValenceConnectivityIndex(self, st, m):
"""
Handbook p. 85: connectivity indices of mth order -- usually known as
Kier-Hall connectivity indices, defined a general scheme based on the
Randic index to also calculate zero-order and higher-order descriptors.
:param m: index order of the connectivity index
:type m: int
"""
vertex_func = self._getValenceVertexDegree
return self._generalizedConnectivityIndex(st, m, vertex_func, -0.5)
@cached_topo_descriptor
def _getMeanValenceConnectivityIndex(self, st, m):
"""
Handbook p. 86: mean valence connectivity indices of mth order -- again,
replacing the vertex degree by the valence vertex degree in the similar
mean connectivity indices.
:param m: index order of the connectivity index
:type m: int
"""
connectivity_index = float(self._getValenceConnectivityIndex(st, m))
return old_div(connectivity_index,
len(self._getConnectivityPaths(st)[m]))
@cached_topo_descriptor
def _getQuasiWienerIndex(self, st):
"""
Handbook p. 253: quasi-Wiener index (W^*) -- sum of the reciprocal A - 1
positive eigenvalues was proposed as a molecular descriptor. For acyclic
graphs, the quasi-Wiener index coincides with the Wiener index, while
for cycle-containing graphs the two descriptors differ. Moreover, it has
been demonstrated that the quasi-Wiener index coincides with the
Kirchhoff number for any graph.
"""
eigenvalues = self._getLaplacianEigenvalues(st)[:-1] # A-1 values
quasi_sum = sum(
old_div(1.0, eigenvalues[i]) for i in range(len(eigenvalues)))
return self._getAtomCount(st) * quasi_sum
@cached_topo_descriptor
def _getFirstMoharIndex(self, st):
"""
Handbook p. 254: first Mohar indices (TI)_1 -- index derived from the
Laplacian matrix.
"""
self._assertMultipleHeavyAtoms(st) # prevent log(0)
bond_atom_ratio = old_div(float(self._getBondCount(st)),
self._getAtomCount(st))
return 2 * math.log10(bond_atom_ratio) * self._getQuasiWienerIndex(st)
@cached_topo_descriptor
def _getSecondMoharIndex(self, st):
"""
Handbook p. 254: second Mohar indices (TI)_2 -- index derived from the
Laplacian matrix.
"""
first_non_zero_eigen = self._getLaplacianEigenvalues(st)[-2]
return old_div(4.0, (self._getAtomCount(st) * first_non_zero_eigen))
@cached_topo_descriptor
def _getSpanningTreeNumber(self, st):
"""
Handbook p. 253: log of spanning tree number (T^*) -- log of the product
of the positive A-1 eigenvalues of the Laplacian matrix.
NOTE: Take the natural log to avoid overflow.
"""
eigenvalues = self._getLaplacianEigenvalues(st)[:-1] # A-1 values
eigenvalue_logsum = sum(math.log(lamdda_i) for lamdda_i in eigenvalues)
return eigenvalue_logsum - math.log(self._getAtomCount(st))
@cached_topo_descriptor
def _getBenzeneLikelinessIndex(self, st):
"""
Handbook p. 379: benzene-likeliness index -- an aromaticity index based
on the first-order valence connectivity index divided by the number of
bonds and normalized on the benzene molecule.
"""
vX1 = self._getValenceConnectivityIndex(st, 1)
return 3 * vX1 / self._getBondCount(st)
@cached_topo_descriptor
def _getTopologicalChargeIndex(self, st, k):
"""
Handbook p. 445: topological charge index (G_k) -- the half-sum of all
charge terms corresponding to pair of vertices with topological
distance = k and would represent the total charge transfer between
atoms placed at topological distance k.
:param k: path length
:type k: int
"""
charge_term_matrix = self._getChargeTermMatrix(st)
distance_matrix = self._getDistanceMatrix(st)
charge_sum_k = 0.0
for i in range(distance_matrix.shape[0]):
for j in range(i + 1, distance_matrix.shape[0]):
# Distance matrix is symmetric, d_ij == d_ji
if distance_matrix[i][j] == k:
# Charge term matrix is asymmetric, CT_ij != CT_ji
charge_sum_k += abs(charge_term_matrix[i][j])
charge_sum_k += abs(charge_term_matrix[j][i])
return 0.5 * charge_sum_k
@cached_topo_descriptor
def _getMeanTopologicalChargeIndex(self, st, k):
"""
Handbook p. 445: mean topological charge index (J_k) -- topological
charge index divided by the number of edges in an acyclic molecule.
Values are set to zero for k greater than the molecular diameter.
:param k: path length
:type k: int
"""
if k > self._getTopologicalDiameter(st):
return 0.0
norm = self._getDistanceCount(st, k)
return old_div(self._getTopologicalChargeIndex(st, k), float(norm))
@cached_topo_descriptor
def _getGlobalTopologicalChargeIndex(self, st):
"""
Handbook p. 445: global topological charge index (J) -- sum over mean
topological charge indinces, with the superior limit equal to 5. The
value was proposed by the authors to obtain uniform length descriptors.
NOTE: Past implementation, as well as DRAGON, sum up to k=10
"""
return sum(
self._getMeanTopologicalChargeIndex(st, i) for i in range(1, 11))
@cached_topo_descriptor
def _getHyperDistancePathIndex(self, st):
"""
Handbook p. 118: hyper-distance-path index (D_p) -- defined as applying
the Wiener operator to the distance-path matrix, where entry i-j of the
matrix is calculated from the distance matrix D as all the possible
combinations of two elements taken from d_ij + 1 elements (binomial
coefficient).
"""
dist_matrix = self._getDistanceMatrix(st)
dist_path_matrix = old_div((numpy.power(dist_matrix, 2) + dist_matrix),
2.0)
return self._wienerOperator(dist_path_matrix)
@cached_topo_descriptor
def _getReciprocalHyperDistancePathIndex(self, st):
"""
Handbook p. 118: reciprocal hyper-distance-pathindex (D_p^-1) -- defined
in the same was as the hyper-distance-path index, but where elements of
the distance-path matrix are reciprocal.
"""
dist_matrix = self._getDistanceMatrix(st)
dist_path_matrix = old_div((numpy.power(dist_matrix, 2) + dist_matrix),
2.0)
return self._wienerOperator(matrix_reciprocal(dist_path_matrix))
@cached_topo_descriptor
def _getSquareReciprocalDistanceSumIndex(self, st):
"""
Handbook p. 210: square reciprocal distance sum index (Har2) -- the
Harary index calculated with the reciprocal squared distance matrix.
"""
return self._wienerOperator(self._getReciprocalSquareDistanceMatrix(st))
@cached_topo_descriptor
def _getModifiedRandicIndex(self, st):
"""
Handbook p. 88: modified Randic index (chi^1_mod) -- sum of atomic
properties, accounting for valence electrons and extended connectivities
using a Randic connectivity index-type formula.
0.5 * sum_atoms[ sum_atomi_bonds[ Z_i * (delta_i * delta_j) ^ -0.5 ]]
"""
randic_sum = 0.0
for atom in self._getHeavyStructure(st).atom:
for bond in atom.bond:
di = self._getVertexDegree(st, bond.atom1.index)
dj = self._getVertexDegree(st, bond.atom2.index)
randic_sum += old_div(float(atom.atomic_number),
pow(di * dj, 0.5))
return 0.5 * randic_sum
@cached_topo_descriptor
def _getBalabanCentricIndex(self, st):
"""
Handbook p. 42: Balaban centric index (B) -- topological index defined
for acyclic graphs based on the pruning of the graph, a stepwise
procedure for removing all the terminal vertices.
"""
if self._getCyclomaticNumber(st):
raise TopoError("Cycles present, only defined for acyclic graphs")
square_func = lambda x: x * x
return self._centricIndexSum(st, self._getVertexDegree, square_func)
@cached_topo_descriptor
def _getLoppingCentricInformationIndex(self, st):
"""
Handbook p. 42: lopping centric information index (I_B) -- index
defined as the mean information content derived from the pruning of
acyclic graphs based on the pruning of the graph, a stepwise
procedure for removing all the terminal vertices.
"""
if self._getCyclomaticNumber(st):
raise TopoError("Cycles present, only defined for acyclic graphs")
n = float(self._getAtomCount(st))
info_func = lambda x: -(old_div(x, n)) * math.log((old_div(x, n)), 2)
return self._centricIndexSum(st, self._getVertexDegree, info_func)
@cached_topo_descriptor
def _getKierHallElectronegativitySum(self, st):
"""
Handbook p. 475: Kier-Hall electronegativity index (KHE) -- sum of
Kier-Hall electrotopological states.
"""
KHE_sum = 0.0
for i, atom in enumerate(self._getHeavyStructure(st).atom, start=1):
delta_vi = self._getValenceVertexDegree(st, i)
delta_i = self._getVertexDegree(st, i)
L_i = mm.mmelement_get_period(atom.atomic_number)
KHE_sum += old_div(float(delta_vi - delta_i), pow(L_i, 2))
return KHE_sum
@cached_topo_descriptor
def _getTopologicalDistanceSum(self, st, elem1, elem2):
"""
Adopted from DRAGON: sum of topological distances between all pairs
of given atom types.
"""
distance_matrix = self._getDistanceMatrix(st)
elements = {elem1, elem2}
top_dist = 0
heavy_st = self._getHeavyStructure(st)
ele_map = {int(atom): atom.element for atom in heavy_st.atom}
for i, j in self._getAtomPairs(st):
if {ele_map[i], ele_map[j]} == elements:
top_dist += distance_matrix[i - 1, j - 1]
return top_dist
@cached_topo_descriptor
def _getBaryszWeinerTypeIndex(self, st):
"""
Handbook p. 489: index calculated by applying the Wiener operator to
the Barysz distance matrix.
"""
return self._wienerOperator(self._getBaryszDistanceMatrix(st))
@cached_topo_descriptor
def _getElectronegativityWienerTypeIndex(self, st):
"""
Handbook p. 489: index calculated by applying the Wiener operator to
the electronegativities weighted distance matrix.
"""
distance_matrix = self._getElectronegativityDistanceMatrix(st)
return self._wienerOperator(distance_matrix)
@cached_topo_descriptor
def _getMassWienerTypeIndex(self, st):
"""
Handbook p. 489: index calculated by applying the Wiener operator to
the atomic mass weighted distance matrix.
"""
return self._wienerOperator(self._getMassDistanceMatrix(st))
@cached_topo_descriptor
def _getVDWVolumeWienerTypeIndex(self, st):
"""
Handbook p. 489: index calculated by applying the Wiener operator to
the van der Waals volume weighted distance matrix.
"""
return self._wienerOperator(self._getVDWVolumeDistanceMatrix(st))
@cached_topo_descriptor
def _getPolarizabilityWienerTypeIndex(self, st):
"""
Handbook p. 489: index calculated by applying the Wiener operator to
the polarizability weighted distance matrix.
"""
distance_matrix = self._getPolarizabilityDistanceMatrix(st)
return self._wienerOperator(distance_matrix)
@cached_topo_descriptor
def _getBaryszIndex(self, st):
"""
Handbook p. 489: Barysz index (J_het) -- a generalization of the
Balaban distance connectivity index calculated by applying the
Ivanciuc-Balaban operator to the Barysz distance matrix.
"""
return self._balbanOperator(st, self._getBaryszDistanceMatrix(st))
@cached_topo_descriptor
def _getElectronegativityBalabanTypeIndex(self, st):
"""
Handbook p. 489: index calculated by applying the Ivanciuc-Balaban
operator to the electronegativities weighted distance matrix.
"""
distance_matrix = self._getElectronegativityDistanceMatrix(st)
return self._balbanOperator(st, distance_matrix)
@cached_topo_descriptor
def _getMassBalabanTypeIndex(self, st):
"""
Handbook p. 489: index calculated by applying the Ivanciuc-Balaban
operator to the atomic mass weighted distance matrix.
"""
return self._balbanOperator(st, self._getMassDistanceMatrix(st))
@cached_topo_descriptor
def _getVDWVolumeBalabanTypeIndex(self, st):
"""
Handbook p. 489: index calculated by applying the Ivanciuc-Balaban
operator to the van der Waals volume weighted distance matrix.
"""
return self._balbanOperator(st, self._getVDWVolumeDistanceMatrix(st))
@cached_topo_descriptor
def _getPolarizabilityBalabanTypeIndex(self, st):
"""
Handbook p. 489: index calculated by applying the Ivanciuc-Balaban
operator to the polarizability weighted distance matrix.
"""
distance_matrix = self._getPolarizabilityDistanceMatrix(st)
return self._balbanOperator(st, distance_matrix)
@cached_topo_descriptor
def _getTopologicalDiameter(self, st):
"""
Handbook p. 112: topological diameter (D) -- defined as the maximum
atom eccentricity
"""
return max([
self._getAtomEccentricity(st, i) for i in self._getAtomIndices(st)
])
@cached_topo_descriptor
def _getTopologicalRadius(self, st):
"""
Handbook p. 112: topological radius (R) -- defined as the minimum atom
eccentricity
"""
return min([
self._getAtomEccentricity(st, i) for i in self._getAtomIndices(st)
])
@cached_topo_descriptor
def _getPetitjeanShapeIndex(self, st):
"""
Handbook p. 390: Petitjean shape index (I_2) -- a topological anisometry
descriptor, also called a graph-theoretical shape coefficient.
"""
self._assertMultipleHeavyAtoms(st) # prevent divide by zero
diameter = self._getTopologicalDiameter(st)
radius = self._getTopologicalRadius(st)
return old_div((diameter - radius), float(radius))
@cached_topo_descriptor
def _getBalabanDistanceConnectivityIndex(self, st):
"""
Handbook p. 21: Balaban distance connectivity index (J) -- defined in
terms of sums over each i th row of the distance matrix as::
B/(C+1) * sum_bonds[ (vertex dist_i * vertex dist_j) ^ -0.5 ]
It is also called distance connectivity index or average distance sum
connectivity.
"""
return self._balbanOperator(st, self._getDistanceMatrix(st))
@cached_topo_descriptor
def _getSolvationConnectivityIndex(self, st, m):
"""
Handbook p. 88: solvation connectivity indices (m chi^s_q) -- descriptor
defined in order to model solvation entropy and describe dispersion
interactions in solution.
"""
self._assertMultipleHeavyAtoms(st) # prevent pow(0, exponent)
self._assertPathLength(st, m)
connectivity_index = 0.0
atoms = self._getHeavyStructure(st).atom
for path in self._getConnectivityPaths(st)[m]:
L = [mm.mmelement_get_period(atoms[i].atomic_number) for i in path]
delta = [self._getVertexDegree(st, i) for i in path]
connectivity_index += old_div(numpy.prod(L),
pow(numpy.prod(delta), 0.5))
return (old_div(1.0, pow(2, m + 1))) * connectivity_index
@cached_topo_descriptor
def _getConnectivityIndex(self, st, m):
"""
Handbook p. 85: connectivity indices of mth order -- usually known as
Kier-Hall connectivity indices, defined a general scheme based on the
Randic index to also calculate zero-order and higher-order descriptors.
:param m: index order of the connectivity index
:type m: int
"""
vertex_func = self._getVertexDegree
return self._generalizedConnectivityIndex(st, m, vertex_func, -0.5)
@cached_topo_descriptor
def _getMeanConnectivityIndex(self, st, m):
"""
Handbook p. 85: mean connectivity indices of mth order -- the Handbook
only describes the mean Randic connectivity index (m=1), defined as the
Randic connectivity index divided by the number of bonds. Here, we
extrapolate to zero and higher orders
:param m: index order of the connectivity index
:type m: int
"""
connectivity_index = float(self._getConnectivityIndex(st, m))
return old_div(connectivity_index,
len(self._getConnectivityPaths(st)[m]))
@cached_topo_descriptor
def _getRDCHIIndex(self, st):
"""
Handbook p. 116: RDCHI index -- topological index based on a
Randic-type formula, which increases with molecular size but decreases
with molecular branching.
"""
RDCHI = 0.0
for i, j in self._getBondIndices(st):
RDS_i = self._getReciprocalDistanceSum(st, i)
RDS_j = self._getReciprocalDistanceSum(st, j)
RDCHI += pow(RDS_i * RDS_j, -0.5)
return RDCHI
@cached_topo_descriptor
def _getRDSQIndex(self, st):
"""
Handbook p. 116: RDSQ index -- topological index based on a
Randic-type formula, which increases with both molecular size and
molecular branching.
"""
RDSQ = 0.0
for i, j in self._getBondIndices(st):
RDS_i = self._getReciprocalDistanceSum(st, i)
RDS_j = self._getReciprocalDistanceSum(st, j)
RDSQ += pow(RDS_i * RDS_j, 0.5)
return RDSQ
@cached_topo_descriptor
def _getKierAlphaModifiedShapeIndex(self, st, m):
"""
Handbook p. 249: Kier alpha-modified shape descriptor (m kappa_alpha) --
descriptor defined in terms of the number of graph vertices A and the
number of paths with length (m = 1,2,3). The alpha-modified version
takes into account the different shape contribution of heteroatoms and
hybridization states.
"""
self._assertPathLength(st, m)
# alpha values indexed by atomic number and hybridization state
# Handbook p. 250, Table K-1
R_Csp3 = 0.77
alpha_dict = {
(6, canvas.Sp3): (old_div(R_Csp3, R_Csp3)) - 1.0,
(6, canvas.Sp2): (old_div(0.67, R_Csp3)) - 1.0,
(6, canvas.Sp): (old_div(0.60, R_Csp3)) - 1.0,
(7, canvas.Sp3): (old_div(0.74, R_Csp3)) - 1.0,
(7, canvas.Sp2): (old_div(0.62, R_Csp3)) - 1.0,
(7, canvas.Sp): (old_div(0.55, R_Csp3)) - 1.0,
(8, canvas.Sp3): (old_div(0.74, R_Csp3)) - 1.0,
(8, canvas.Sp2): (old_div(0.62, R_Csp3)) - 1.0,
(9, canvas.Sp3): (old_div(0.72, R_Csp3)) - 1.0,
(15, canvas.Sp3): (old_div(1.10, R_Csp3)) - 1.0,
(15, canvas.Sp2): (old_div(1.00, R_Csp3)) - 1.0,
(16, canvas.Sp3): (old_div(1.04, R_Csp3)) - 1.0,
(16, canvas.Sp2): (old_div(0.94, R_Csp3)) - 1.0,
(17, canvas.Sp3): (old_div(0.99, R_Csp3)) - 1.0,
(35, canvas.Sp3): (old_div(1.14, R_Csp3)) - 1.0,
(53, canvas.Sp3): (old_div(1.33, R_Csp3)) - 1.0,
}
# parameter derived from the ration of the covalent raidus relative
# to the sp3 carbon atom
alpha = 0.0
for a in self._getChmMol(st).getHeavyAtoms():
atom_key = (a.getAtomicNumber(), a.getHybridization())
try:
alpha += alpha_dict[atom_key]
except KeyError:
raise TopoError("No alpha defined for atom %d %d" % atom_key)
A = self._getAtomCount(st)
A_alpha = A + alpha
if m == 1:
minmax_path_count = A_alpha * pow(A_alpha - 1, 2)
elif m == 2:
minmax_path_count = (A_alpha - 1) * pow(A_alpha - 2, 2)
elif m == 3 and A % 2 == 0: # even number of atoms
minmax_path_count = (A_alpha - 3) * pow(A_alpha - 2, 2)
elif m == 3 and A % 2 != 0: # odd number of atoms
minmax_path_count = (A_alpha - 1) * pow(A_alpha - 3, 2)
else:
raise TopoError("Unknown m value %d" % m)
num_paths = len(self._getConnectivityPaths(st)[m]) + alpha
return old_div(float(minmax_path_count), pow(num_paths, 2))
@cached_topo_descriptor
def _getKierMolecularFlexibilityIndex(self, st):
"""
Handbook p. 178: Kier molecular flexibility index (fi) -- a measure of
molecular flexibility derived from the Kier alpha-modified shape
descriptors, kappa1 encodes information about the count of atoms and
relative cyclicity of molecules, while kappa2 encodes information about
branching or relative spatial density of molecules.
"""
kappa1 = self._getKierAlphaModifiedShapeIndex(st, 1)
kappa2 = self._getKierAlphaModifiedShapeIndex(st, 2)
return kappa1 * kappa2 / self._getAtomCount(st)
@cached_topo_descriptor
def _getMolecularPathWalkIndex(self, st, m):
"""
Handbook p. 393: Molecular path/walk indices -- the average sum of
atomic path/walk indices of equal length. Obtained by separately
summing all the paths and walks of the same length, and then calculating
the ratio between their counts.
"""
self._assertPathLength(st, m)
paths = self._getConnectivityPaths(st)[m]
awc = self._getAtomicWalkCounts(st, m)
path_walk_sum = 0.0
for i in self._getAtomIndices(st):
# Number of paths of length m from atom i
atom_path_count = len([p for p in paths if i in (p[0], p[m])])
path_walk_sum += old_div(atom_path_count, float(awc[i - 1]))
return old_div(path_walk_sum, self._getAtomCount(st))
@cached_topo_descriptor
def _getEStateIndex(self, st):
"""
Molecular Descriptors p. 42: E-state topological parameter (TI^E) --
derived by applying the Ivanciuc-Balaban operator to the E-state index
values. It has to be pointed out that the proposed formula for the
E-state topological parameter cannot be used for every molecule because
it presents two drawbacks: (1) it cannot be calculated when there exists
one atom in the molecule with negative E-state value; (2) it assumes
very large values even when one S value tends to zero. To overcome
these drawbacks of the original formula, an alternative formula
(adopted in the DRAGON descriptors) is used here.
"""
B = self._getBondCount(st)
C = self._getCyclomaticNumber(st)
# exp(E-state index) = exp(I_i + Delta I_i)
exp_S = [
math.exp(sum(x)) for x in zip(self._getFieldEffects(st),
self._getIntrinsicStates(st))
]
dist_sum = 0.0
for i, j in self._getBondIndices(st):
S_i, S_j = exp_S[i - 1], exp_S[j - 1]
dist_sum += pow(1.0 + (S_i * S_j), -0.5)
return (old_div(float(B), (C + 1))) * dist_sum
@cached_topo_descriptor
def _getChiralAtomCount(self, st):
"""
Unknown source: number of chiral atoms -- count chiral atoms labeled
with either R or S (ignoring ANR and ANS).
_getFullStructure needs to be avoided here, since H atom indexes would
be altered, and Chirality labels will be invalidaded for 1D structures.
"""
st_copy = st.copy()
build.add_hydrogens(st_copy)
chiral_atoms = analyze.get_chiral_atoms(st_copy)
return len([c for c in chiral_atoms.values() if c in ("R", "S")])
@cached_topo_descriptor
def _getRingCount(self, st, n):
"""
Adopted from DRAGON: number of rings of heavy atom count n
"""
ring_lengths = [len(ring) for ring in self._getHeavyStructure(st).ring]
return ring_lengths.count(n)
@cached_topo_descriptor
def _getAtomCount(self, st):
"""
Handbook p. 16: atom number (A) -- defined as the total number of atoms
in a molecule, which refers only to non-hydrogen atoms (atom count).
"""
return self._getHeavyStructure(st).atom_total
@cached_topo_descriptor
def _getBondCount(self, st):
"""
Handbook p. 28: bond number (B) -- defined as the number of bonds in
the molecule (edge counting; bond count).
"""
return len(self._getHeavyStructure(st).bond)
@cached_topo_descriptor
def _getRingAtomCount(self, st):
"""
Molecular Descriptors p. 655: (A_R) -- the total number of atoms
belonging to rings
"""
ring_atoms = [
index for ring in self._getHeavyStructure(st).ring
for index in ring.getAtomIndices()
]
return len(set(ring_atoms))
@cached_topo_descriptor
def _getRingBondCount(self, st):
"""
Molecular Descriptors p. 655: (B_R) -- the total number of bonds
belonging to rings
"""
ring_bonds = [
sorted([bond_edge.atom1.index, bond_edge.atom2.index])
for ring in self._getHeavyStructure(st).ring
for bond_edge in ring.edge
]
return len(set([tuple(bond) for bond in ring_bonds]))
@cached_topo_descriptor
def _getNumberRingSystems(self, st):
"""
Molecular Descriptors p. 655: number of ring systems (NRS)
"""
return (self._getBondCount(st) - self._getRingBondCount(st)) - \
(self._getAtomCount(st) - self._getRingAtomCount(st)) + 1
@cached_topo_descriptor
def _getNormalizedNumberRingSystems(self, st):
"""
Molecular Descriptors p. 655: normalized number of ring systems (NRS*)
by the cyclomatic number.
"""
C = self._getCyclomaticNumber(st)
if not C:
raise TopoError("No ring systems present")
return old_div(self._getNumberRingSystems(st), float(C))
@cached_topo_descriptor
def _getRingFusionDegree(self, st):
"""
Molecular Descriptors p. 655: ring fusion degree (RFD) -- the
reciprocal of the normalized number of ring systems.
"""
return old_div(1.0, self._getNormalizedNumberRingSystems(st))
@cached_topo_descriptor
def _getTotalRingSize(self, st):
"""
Molecular Descriptors p. 656: total ring size (R) -- the sum of the
ring size of all the single cycles of all the ring systems; in this
case, the atoms belonging to fused connections are counted twice.
"""
ring_atoms = [
index for ring in self._getHeavyStructure(st).ring
for index in ring.getAtomIndices()
]
return len(ring_atoms)
@cached_topo_descriptor
def _getRingPerimeter(self, st):
"""
Molecular Descriptors p. 656: ring perimeter (R_P) -- represents the
perimeter of all the rings present in the molecule.
"""
return 2 * self._getRingBondCount(st) - self._getTotalRingSize(st)
@cached_topo_descriptor
def _getRingBridges(self, st):
"""
Molecular Descriptors p. 656: ring bridges (R_B) -- represents the
number of bridge edges.
"""
return self._getTotalRingSize(st) - self._getRingBondCount(st)
@cached_topo_descriptor
def _getMolecularCyclizedDegree(self, st):
"""
Molecular Descriptors p. 656: molecular cyclized degree (MCD) --
ratio of ring atoms to total atoms.
"""
return old_div(self._getRingAtomCount(st),
float(self._getAtomCount(st)))
@cached_topo_descriptor
def _getRingFusionDensity(self, st):
"""
Molecular Descriptors p. 656: ring fusion density (RF Delta)
"""
ring_atom_count = self._getRingAtomCount(st)
if not ring_atom_count:
raise TopoError("No ring systems present")
return 2.0 * self._getRingBridges(st) / ring_atom_count
@cached_topo_descriptor
def _getRingComplexityIndex(self, st):
"""
Molecular Descriptors p. 656: ring complexity index (C_R)
"""
ring_atom_count = self._getRingAtomCount(st)
if not ring_atom_count:
raise TopoError("No rings present")
return old_div(self._getTotalRingSize(st),
float(self._getRingAtomCount(st)))
@cached_topo_descriptor
def _getTotalVDWSurfaceArea(self, st):
"""
Molecular Descriptors p. 609: Total VSA for all atoms
"""
return sum(self._getVDWSurfaceArea(st))
@cached_topo_descriptor
def _getPVSAMolarRefractivity(self, st, n):
"""
Molecular Descriptors p. 611: P_VSA based on molar refractivity.
Property interval boundaries extracted from Table P16, p. 611
"""
check_canvas_license()
mr_typer = canvas_base.ChmAtomTyperMR()
value_iterator = mr_typer.valueType(self._getChmMol(st), False)
mr_values = [value.getValue() for value in value_iterator]
intervals = [0.11, 0.26, 0.35, 0.39, 0.44, 0.485, 0.56]
return self._propertyRangeVDWSurfaceArea(st, mr_values, intervals, n)
@cached_topo_descriptor
def _getPVSAlogP(self, st, n):
"""
Molecular Descriptors p. 611: P_VSA based on log P values, to capture
hydrophobic and hydrophillic effect. Property interval boundaries
extracted from Table P16, p. 611
"""
check_canvas_license()
alogp_typer = canvas_base.ChmAtomTyperAlogP()
value_iterator = alogp_typer.valueType(self._getChmMol(st), False)
alogp_values = [value.getValue() for value in value_iterator]
intervals = [-0.4, -0.2, 0.0, 0.1, 0.15, 0.2, 0.25, 0.3, 0.4]
return self._propertyRangeVDWSurfaceArea(st, alogp_values, intervals, n)
@cached_topo_descriptor
def _getPVSAPartialCharges(self, st, n):
"""
Molecular Descriptors p. 611: P_VSA based on Gasteiger charges, ie.
partial equalization of orbital electronegativity (PEOE). Property
interval boundaries extracted from Table P16, p. 611
"""
check_canvas_license()
gasteiger = canvas_base.ChmGasteigerCharge()
try:
gasteiger.calcGasteigerCharges(self._getChmMol(st))
# TODO: Currently throws RuntimeError, need to wrap ChmException
except RuntimeError as e:
raise TopoError(str(e).strip())
peoe_values = gasteiger.getCharges()
intervals = [
-0.3, -0.25, -0.2, -0.15, -0.1, -0.05, 0.0, 0.05, 0.1, 0.15, 0.2,
0.25, 0.3
]
return self._propertyRangeVDWSurfaceArea(st, peoe_values, intervals, n)
# =========================================================================
# Intermediate descriptor methods
# =========================================================================
@cached_topo_descriptor
def _getHeavyStructure(self, st):
"""
Handbook p. 313: Hydrogen-depleted molecule.
"""
st_copy = st.copy()
build.delete_hydrogens(st_copy)
return st_copy
@cached_topo_descriptor
def _getFullStructure(self, st):
"""
Fully hydrogenated molecule.
"""
st_copy = self._getHeavyStructure(st).copy()
build.add_hydrogens(st_copy)
return st_copy
@cached_topo_descriptor
def _getChmMol(self, st):
check_canvas_license()
adaptor = canvas_base.ChmMmctAdaptor()
return adaptor.create(int(self._getFullStructure(st)))
@cached_topo_descriptor
def _getHydrogenCount(self, st, heavy_atom_index):
"""
Handbook p. 474: (h_i) -- the number of hydrogen atoms bonded to a
given heavy atom.
:param heavy_atom_index: 1-based index of the heavy atom structure
:type heavy_atom_index: int
"""
# This works because hydrogens are added to the HeavyStructure, which
# appends them and does not change the heavy atom indices
full_st_atom = self._getFullStructure(st).atom[heavy_atom_index]
return sum(bond.atom2.atomic_number == 1 for bond in full_st_atom.bond)
@cached_topo_descriptor
def _getAtomIndices(self, st):
"""
Returns list of atom indices, which are 1-based.
"""
return list(range(1, self._getHeavyStructure(st).atom_total + 1))
@cached_topo_descriptor
def _getAtomPairs(self, st):
"""
Returns list of all pairs of atom indices, which are 1-based.
"""
n = self._getAtomCount(st)
return [(i, j) for i in range(1, n + 1) for j in range(i + 1, n + 1)]
@cached_topo_descriptor
def _getBondIndices(self, st):
"""
Returns list of bond index tuples, which are 1-based.
Sorts indices (for comparision purposes)
"""
return [
tuple(sorted([bond.atom1.index, bond.atom2.index]))
for bond in self._getHeavyStructure(st).bond
]
@cached_topo_descriptor
def _getAdjacencyMatrix(self, st):
"""
Handbook p. 2: adjacency matrix (A) -- represents the whole set of
connections between adjacent pairs of atoms. The entries a_ij of the
matrix equal one if vertices vi and vj are bonded, and zero otherwise.
The adjacency matrix is symmetric with dimension A x A , where A is the
number of atoms and it is derived from an H-depleted molecular graph.
"""
n = self._getAtomCount(st)
adjmat = numpy.zeros((n, n), dtype=int)
for i, j in self._getBondIndices(st):
adjmat[i - 1][j - 1] = adjmat[j - 1][i - 1] = 1
return adjmat
@cached_topo_descriptor
def _getVertexDegree(self, st, atom_index):
"""
Handbook p. 474, 2: vertex degree (delta_i) -- the ith row sum of the
adjacency matrix.
:param atom_index: 1-based atomic index
:type atom_index: int
"""
return sum(self._getAdjacencyMatrix(st)[atom_index - 1])
@cached_topo_descriptor
def _getSimpleValenceVertexDegree(self, st, atom_index):
"""
Handbook p. 474: valence vertex degree (delta^v_i) -- vertex degree
taking into account all valence electrons of the ith atom. A simplified
version of the equation below.
:param atom_index: 1-based atomic index
:type atom_index: int
"""
atom = self._getHeavyStructure(st).atom[atom_index]
Zv_i = mm.mmelement_get_valence_count(atom.atomic_number)
h_i = self._getHydrogenCount(st, atom_index)
return float(Zv_i - h_i)
@cached_topo_descriptor
def _getValenceVertexDegree(self, st, atom_index):
"""
Handbook p. 474: valence vertex degree (delta^v_i) -- vertex degree
taking into account all valence electrons of the ith atom. It encodes
the electronic identity of the atom in terms of both valence electron
and core electron counts. Given as: (Z^v_i - h_i) / (Z_i - Z^v_i - 1)
For atoms of higher principal quantum levels (P, S, Cl, Br, I), Kier
and Hall proposed to account for both valence and nonvalence electrons
Z^v_i = number of valence electrons of ith atom
h_i = number of hydrogens bonded to atom
Z_i = total number of electrons of ith atom (atomic number)
:param atom_index: 1-based atomic index
:type atom_index: int
"""
atom = self._getHeavyStructure(st).atom[atom_index]
Zv_i = mm.mmelement_get_valence_count(atom.atomic_number)
h_i = self._getHydrogenCount(st, atom_index)
return old_div(float(Zv_i - h_i), (atom.atomic_number - Zv_i - 1))
@cached_topo_descriptor
def _getConnectivityGraph(self, st):
"""
Build undirected graph corresponding to molecular connectivity.
"""
graph = networkx.Graph()
graph.add_edges_from(self._getBondIndices(st))
return graph
@cached_topo_descriptor
def _getConnectivityGraphWithWeights(self, st, carbon_weight, atom_weights):
"""
Build a graph representation of a ligand structure, weighted according
to the rules specified for building the multigraph distance matrix
(inverse bond order).
:param st: a ligand structure
:type st: structure.Structure
:param carbon_weight: weight assigned to carbon atoms
:type carbon_weight: float
:param atom_weights: weight assigned to each heavy atom in the ligand;
should have length equal to the number of heavy atoms in the
ligand
:type atom_weights: tuple[float]
:return: a weighted graph representation of the ligand `st`
:rtype: networkx.Graph
"""
graph = networkx.Graph()
heavy_st = self._getHeavyStructure(st)
weight_c2 = carbon_weight * carbon_weight
for idx1, idx2 in self._getBondIndices(st):
weight1, weight2 = atom_weights[idx1 - 1], atom_weights[idx2 - 1]
bond_order = self._getConventionalBondOrder(st, idx1, idx2)
weight = weight_c2 / (bond_order * weight1 * weight2)
graph.add_edge(idx1, idx2, weight=weight)
return graph
@cached_topo_descriptor
def _getDistanceMatrix(self, st):
"""
Handbook p. 112: distance matrix (D) -- matrix of all topological
distances between all the atom pairs. Also known as the vertex distance
matrix. The topological distance d_ij the number of edges in the
shortest path between the vertices vi and vi; the off-diagonal entries
of the distance matrix equal one if vertices vi and v, are adjacent
(i.e. the atoms i and j are bonded) and are greater than one otherwise.
The diagonal elements are of course equal to zero. The distance matrix
is symmetric with dimension A x A, where A is the number of atoms, and
is derived from the H-depleted molecular graph.
"""
n = self._getAtomCount(st)
dist_mat = numpy.zeros((n, n), dtype=int)
graph = self._getConnectivityGraph(st)
for i, j in self._getAtomPairs(st):
path_length = networkx.shortest_path_length(graph, i, j)
dist_mat[i - 1][j - 1] = dist_mat[j - 1][i - 1] = path_length
return dist_mat
@cached_topo_descriptor
def _getReciprocalDistanceMatrix(self, st):
"""
Handbook p. 112: reciprocal distance matrix (D^-1) -- a square
symmetric A x A matrix derived from the distance matrix where each
off-diagonal element is the reciprocal of the topological distance d
between the considered vertices (and 0 for d_ii).
"""
return matrix_reciprocal(self._getDistanceMatrix(st))
@cached_topo_descriptor
def _getReciprocalSquareDistanceMatrix(self, st):
"""
Handbook p. 112: reciprocal square distance matrix (D^-2) -- a square
symmetric A x A matrix derived from the distance matrix where each
off-diagonal element is the reciprocal of the topological distance d
between the considered vertices (and 0 for d_ii).
"""
return numpy.power(self._getReciprocalDistanceMatrix(st), 2)
@cached_topo_descriptor
def _getVertexDistanceDegree(self, st, atom_index):
"""
Handbook p. 113, vertex distance degree (sigma_i) -- the ith row sum of
the distance matrix. Also known as the distance number, distance index,
distance rank, vertex distance sum, or distance of a vertex.
:param atom_index: 1-based atomic index
:type atom_index: int
"""
return sum(self._getDistanceMatrix(st)[atom_index - 1])
@cached_topo_descriptor
def _getAtomEccentricity(self, st, atom_index):
"""
Handbook p. 112, atom eccentricity (nu_i) -- maximum value entry in the
ith row of the distance matrix, i.e. the maximum distance from theith
vertex to any other vertices (vertex eccentricity).
:param atom_index: 1-based atomic index
:type atom_index: int
"""
return max(self._getDistanceMatrix(st)[atom_index - 1])
@cached_topo_descriptor
def _getDistanceCount(self, st, distance):
"""
Handbook p. 113, graph distance count (f^k) -- total number of
distances in the graph equal to k
:param distance: bonded distance (kth-order)
:type distance: int
"""
distance_count = Counter(self._getDistanceMatrix(st).flatten())
return 0.5 * distance_count[distance]
@cached_topo_descriptor
def _getReciprocalDistanceSum(self, st, atom_index):
"""
Handbook p. 116: reciprocal distance sum (RDS_i) -- sum of the
reciprocal distance matrix elements in the ith row.
:param atom_index: 1-based atomic index
:type atom_index: int
"""
return sum(self._getReciprocalDistanceMatrix(st)[atom_index - 1])
@cached_topo_descriptor
def _getConnectivityPaths(self, st):
"""
Handbook p. 85, all shortest paths keyed by path lengths. The paths
are equivalent to mth order subgraphs in the molecular structure.
"""
connectivity_paths = defaultdict(list)
# Zero-th order paths are the atoms to themselves: [index, index]
connectivity_paths[0] = [[i] for i in self._getAtomIndices(st)]
# Higher order paths are taken from shortest paths
graph = self._getConnectivityGraph(st)
for i, j in self._getAtomPairs(st):
for path_ij in networkx.all_simple_paths(graph, i, j, cutoff=5):
connectivity_paths[len(path_ij) - 1].append(path_ij)
return connectivity_paths
@cached_topo_descriptor
def _getCyclomaticNumber(self, st):
"""
Handbook p. 94, cyclomatic number -- the number of independent cycles
(or rings), and, more exactly, the number of non-overlapping cycles.
Should be equivalent to len(st.ring)
"""
return self._getBondCount(st) - self._getAtomCount(st) + 1
@cached_topo_descriptor
def _getIntrinsicStates(self, st):
"""
Handbook p. 159: intrinsic state (I_i) -- calculated from the principal
quantum number, the number of valence electrons, and the number of
sigma electrons of the ith atom in the H-depleted molecular graph.
NOTE: Molecular Descriptors p. 284, Table E11 and DRAGON use the simple valence
vertex degree when calculating the instrisic state.
"""
self._assertMultipleHeavyAtoms(st) # prevent divide by zero
intrinsic_states = []
heavy_st = self._getHeavyStructure(st)
for i in self._getAtomIndices(st):
atom = heavy_st.atom[i]
L_i = mm.mmelement_get_period(atom.atomic_number)
v_i = self._getSimpleValenceVertexDegree(st, i)
d_i = self._getVertexDegree(st, i)
intrinsic_states.append(
old_div((pow(old_div(2.0, L_i), 2) * v_i + 1), d_i))
return intrinsic_states
@cached_topo_descriptor
def _getFieldEffects(self, st, k=2):
"""
Handbook p. 159: field effect (Delta I_i) -- calculated as perturbation
of the intrinsic state of ith atom by all other atoms in the molecule.
The exponent k is a parameter to modify the influence of distant or
nearby atoms for particular studies. Usually it is taken as k = 2.
"""
intrinsic_states = self._getIntrinsicStates(st)
field_effect = []
atom_count = self._getAtomCount(st)
dist_matrix = self._getDistanceMatrix(st)
for i in range(atom_count):
field_effect_i = 0.0
for j in range(atom_count):
I_diff = intrinsic_states[i] - intrinsic_states[j]
d_ij = dist_matrix[i][j]
field_effect_i += old_div(float(I_diff), pow(d_ij + 1.0, k))
field_effect.append(field_effect_i)
return field_effect
@cached_topo_descriptor
def _getLaplacianEigenvalues(self, st):
"""
Handbook p. 253: laplacian eigenvalues (lambda_i) -- from the Laplacian
matrix, defined as the difference between the vertex degree matrix and
the adjacency matrix. The diagonalization of the Laplacian matrix gives
A real eigenvalues hi which constitute the Laplacian spectrum. All
eignevalues are (a) non-negative numbers, (b) the last value is always
zero, and (c) the second to last is great than zero if the graph is
connected.
"""
# Construct Laplacian matrix; L = V - A
lap_matrix = -1 * self._getAdjacencyMatrix(st)
for i in self._getAtomIndices(st):
lap_matrix[i - 1, i - 1] = self._getVertexDegree(st, i)
# Diagonalize the matrix and return sorted eigenvalues
eigenvalues = list(numpy.linalg.eigh(lap_matrix)[0])
# Check that A-1 eignevalues exist
if not eigenvalues[:-1]:
raise TopoError("No Laplacian eigenvalues available")
return sorted(eigenvalues, reverse=True)
@cached_topo_descriptor
def _getChargeTermMatrix(self, st):
"""
Handbook p. 445: charge term matrix (CT) -- an unsymmetric matrix of
charge terms that are graph invariants related to the charge transfer
between the pair of considered vertices. The diagonal entries of the CT
matrix represent the topological valence of the atoms; the off-diagonal
entries CT_ij represent a measure of the net charge transferred from the
atom j to the atom i.
"""
# Galvez matrix; M = A * D^-2
galvex_matrix = numpy.dot(self._getAdjacencyMatrix(st),
self._getReciprocalSquareDistanceMatrix(st))
# CT = diag(M) + [M - M^T]
return numpy.diag(numpy.diag(galvex_matrix)) + (
galvex_matrix - numpy.transpose(galvex_matrix))
@cached_topo_descriptor
def _getAromaticBonds(self, st):
"""
Collects all bonds in the heavy atom structure that are aromatic.
:return: tuple of aromatic bonds
:rtype: tuple of `structure.StructureBond`
"""
adaptor = canvas.ChmMmctAdaptor()
heavy_atom_st = self._getHeavyStructure(st)
aromatic_info = adaptor.getAromaticityInfo(heavy_atom_st.handle)
aromatic_bonds = []
for i, j in self._getBondIndices(st):
if aromatic_info[i - 1] and aromatic_info[j - 1]:
aromatic_bonds.append(tuple(sorted([i, j])))
return tuple(aromatic_bonds)
@cached_topo_descriptor
def _getConventionalBondOrder(self, st, i, j):
"""
Handbook p. 28: conventional bond order (sigma^*) -- within the
framework of the graph theory, specifically the multigraph, this is
defined as being equal to 1, 2, 3, and 1.5 for single, double, triple
and aromatic bonds, respectively. Define zero-order bonds as 1, as to be
valid with multigraph distance weight and vdW surface area correction.
"""
search_indices = tuple(sorted((i, j)))
for bond in self._getHeavyStructure(st).bond:
indices = tuple(sorted([bond.atom1.index, bond.atom2.index]))
if indices == search_indices:
# Assume aromatic bond are 1.5, as per handbook
if indices in self._getAromaticBonds(st):
return 1.5
# Return 1 for zero-order bonds
if not bond.order:
return 1
# Otherwise use the intenger bond order
return bond.order
raise TopoError("No bond exists between atoms %d and %d" % (i, j))
@cached_topo_descriptor
def _getMultigraphDistanceMatrix(self, st, carbon_weight, atom_weights):
"""
Handbook p. 488: multigraph distance matrix (``*D``) -- a weighted
distance matrix where the distance from vertex vi to vertex vj is
obtained by counting the edges in the shortest path between them, where
each edge counts as the inverse of the conventional bond order, i.e.
the relative topological distance, and therefore contributes 1 / bond
order to the overall path length.
:param carbon_weight: weight for the carbon atom
:type carbon_weight: float
:param atom_weights: tuple of weights for each atom
:type atom_weights: tuple of float
"""
atom_count = self._getAtomCount(st)
dist_mat = numpy.zeros((atom_count, atom_count), dtype=float)
# Diagonal elements are defined as 1 - (X_carbon / X_i)
for i, weight_i in enumerate(atom_weights):
dist_mat[i][i] = 1.0 - (carbon_weight / float(weight_i))
# Off-diagonal elements are the shortest weighted path length between
# atoms
graph = self._getConnectivityGraphWithWeights(st, carbon_weight,
atom_weights)
dist_map = dict(networkx.shortest_path_length(G=graph, weight='weight'))
for idx1, idx2 in self._getAtomPairs(st):
dist_mat[idx1 - 1][idx2 - 1] = dist_map[idx1][idx2]
dist_mat[idx2 - 1][idx1 - 1] = dist_map[idx2][idx1]
return dist_mat
@cached_topo_descriptor
def _getBaryszDistanceMatrix(self, st):
"""
Handbook p. 488: Barysz distance matrix (D^Z) -- a weighted distance
matrix accounting simultaneously for the presence of heteroatoms and
multiple bonds in the molecule.
"""
Z_carbon = 6
Z_weights = [a.atomic_number for a in self._getHeavyStructure(st).atom]
return self._getMultigraphDistanceMatrix(st, Z_carbon, tuple(Z_weights))
@cached_topo_descriptor
def _getElectronegativityDistanceMatrix(self, st):
"""
Molecular Descriptors p. 909: generalized Ivanciuc weighting from the Barysz distance
matrix with Sanderson electronegativities
See: Handbook p. 488, Barysz distance matrix
"""
# Sanderson electronegativity (chi^SA) indexed by atomic number
# MMolecular Descriptors p. 22, Table A3
electronegativity = {
5: 2.275,
6: 2.746,
7: 3.194,
8: 3.654,
9: 4.000,
13: 1.714,
14: 2.138,
15: 2.515,
16: 2.957,
17: 3.475,
26: 2.000,
27: 2.000,
28: 2.000,
29: 2.033,
30: 2.223,
35: 3.219,
50: 2.298,
53: 2.778,
}
w_carbon = electronegativity[6]
try:
w_weights = [
electronegativity[atom.atomic_number]
for atom in self._getHeavyStructure(st).atom
]
except KeyError as e:
raise TopoError("Electronegativity unavailable: %s" % e)
return self._getMultigraphDistanceMatrix(st, w_carbon, tuple(w_weights))
@cached_topo_descriptor
def _getMassDistanceMatrix(self, st):
"""
Molecular Descriptors p. 909: generalized Ivanciuc weighting from the Barysz distance
matrix with atomic masses
See: Handbook p. 488, Barysz distance matrix
"""
w_carbon = 12.01115
w_weights = (a.atomic_weight for a in self._getFullStructure(st).atom \
if a.atomic_number != 1)
return self._getMultigraphDistanceMatrix(st, w_carbon, tuple(w_weights))
@cached_topo_descriptor
def _getVDWVolumeDistanceMatrix(self, st):
"""
Molecular Descriptors p. 909: generalized Ivanciuc weighting from the Barysz distance
matrix with van der Waals volumes
See: Handbook p. 488, Barysz distance matrix
"""
# van der Waals volume (V^vdw) indexed by atomic number
# Molecular Descriptors p. 22, Table A3
vdw_volume = {
5: 17.875,
6: 22.449,
7: 15.599,
8: 11.494,
9: 9.203,
13: 36.511,
14: 31.976,
15: 26.522,
16: 24.429,
17: 23.228,
26: 41.052,
27: 35.041,
28: 17.157,
29: 11.494,
30: 38.351,
35: 31.059,
50: 45.830,
53: 38.792,
}
w_carbon = vdw_volume[6]
try:
w_weights = [
vdw_volume[atom.atomic_number]
for atom in self._getHeavyStructure(st).atom
]
except KeyError as e:
raise TopoError("van der Waals volume unavailable: %s" % e)
return self._getMultigraphDistanceMatrix(st, w_carbon, tuple(w_weights))
@cached_topo_descriptor
def _getPolarizabilityDistanceMatrix(self, st):
"""
Molecular Descriptors p. 909: generalized Ivanciuc weighting from the Barysz distance
matrix with polarizabilities
See: Handbook p. 488, Barysz distance matrix
"""
# polarizability (alpha) indexed by atomic number
# Molecular Descriptors p. 22, Table A3
polarizability = {
5: 3.030,
6: 1.760,
7: 1.100,
8: 0.802,
9: 0.557,
13: 6.800,
14: 5.380,
15: 3.630,
16: 2.900,
17: 2.180,
26: 8.400,
27: 7.500,
28: 6.800,
29: 6.100,
30: 7.100,
35: 3.050,
50: 7.700,
53: 5.350,
}
w_carbon = polarizability[6]
try:
w_weights = [
polarizability[atom.atomic_number]
for atom in self._getHeavyStructure(st).atom
]
except KeyError as e:
raise TopoError("Polarizability unavailable: %s" % e)
return self._getMultigraphDistanceMatrix(st, w_carbon, tuple(w_weights))
@cached_topo_descriptor
def _getAtomicWalkCounts(self, st, k):
"""
Handbook p. 480: atomic walk count (awc) -- the total number of
equipoise walks of length k starting from vertex vi.
"""
# Get A^k
adjacency_k = self._getAdjacencyMatrix(st)
for i in range(k - 1):
adjacency_k = numpy.dot(adjacency_k, self._getAdjacencyMatrix(st))
return numpy.sum(adjacency_k, axis=0)
@cached_topo_descriptor
def _getVDWSurfaceArea(self, st):
"""
Molecular Descriptors p. 609: van der Waals surface area (VSA) --
calculated from the atomic van der Waals radius, summing over
contributions from atoms in the adjacency matrix.
"""
# Correction term depending on the bond order:
# 0 for single, 0.1 for aromatic, 0.2 for double, 0.3 for triple
correction = {1: 0, 1.5: 0.1, 2: 0.20, 3: 0.30}
vdw_surface_areas = []
full_st = self._getFullStructure(st)
for i, atom_i in enumerate(full_st.atom, start=1):
R_i = atom_i.radius
contribution_sum = 0.0
for bond in atom_i.bond:
j = bond.atom2.index
R_j = bond.atom2.radius
# Hydrogens are single bonded, otherwise get bond order
if 1 in (atom_i.atomic_number, bond.atom2.atomic_number):
c_ij = correction[1]
else:
c_ij = correction[self._getConventionalBondOrder(st, i, j)]
b_ij = mm.mmideal_get_stretch(full_st, (i, j)) - c_ij
g_ij = min(max(abs(R_i - R_j), b_ij), R_i + R_j)
contribution_sum += old_div((pow(R_j, 2) - pow(R_i - g_ij, 2)),
g_ij)
# VSA_i = (4 * PI * R_i^2) - (PI * P_I * ij_contributions)
vsa_i = math.pi * R_i * (4 * R_i - contribution_sum)
vdw_surface_areas.append(vsa_i)
return vdw_surface_areas
# =============================================================================
# Non-cached helper functions
# =============================================================================
def _balbanOperator(self, st, matrix):
"""
Handbook p. 7, Ivanciuc-Balaban operator IB(M) -- a modified Randic
operator summing over all bonded pairs for row sums of each
participant::
B/(C+1) * sum_bonds[ (R(i) * R(j)) ^ -0.5 ]
C is the cyclomatic number, B is the number of bonds, and R is the row
sum operator.
"""
B = self._getBondCount(st)
C = self._getCyclomaticNumber(st)
row_sum = [sum(row) for row in matrix]
dist_sum = 0.0
for i, j in self._getBondIndices(st):
dist_sum += pow(row_sum[i - 1] * row_sum[j - 1], -0.5)
return (old_div(float(B), (C + 1))) * dist_sum
def _centricIndexSum(self, st, value_function, centric_function):
"""
Calculates atom centric index based on an atomic property function and
a base function of the centric index sum.
:param value_function: function to extract atomic property
:type value_function: f(st, atom_index)
:param centric_function: base function for component value of count
:type centric_function: f(count)
"""
values = [value_function(st, i) for i in self._getAtomIndices(st)]
value_count_tuples = Counter(values).most_common()
return sum(centric_function(count) for _, count in value_count_tuples)
def _generalizedConnectivityIndex(self, st, m, vertex_func, exponent):
"""
Handbook p. 86: formula for generalized connectivity indices, used for
several descriptors.
:param m: index order of the connectivity index
:type m: int
:param vertex_func: vertex degree function
:type vertex_func: f(st, index)
:param exponent: power to raise vertex degree product
:type exponent: float
"""
self._assertMultipleHeavyAtoms(st) # prevent pow(0, exponent)
self._assertPathLength(st, m)
connectivity_index = 0.0
for path in self._getConnectivityPaths(st)[m]:
vertex_degrees = [vertex_func(st, i) for i in path]
connectivity_index += pow(numpy.prod(vertex_degrees), exponent)
return connectivity_index
def _propertyRangeVDWSurfaceArea(self, st, properties, intervals, n):
"""
Molecular Descriptors p. 609: P_VSA -- the amount of van der Waals
surface area (VSA) having a property value in a certain range. These
descriptors correspond to a partition of the molecular surface area
conditioned by the atomic values of the property P.
:param properties: 1-based atomic index
:type properties: list
:param intervals: 1-based atomic index
:type intervals: list
:param n: 1-based interval
:type n: int
"""
surface_areas = self._getVDWSurfaceArea(st)
if len(properties) > len(surface_areas):
raise TopoError("Mismatch between atom values and number of atoms")
surface_area_sum = 0.0
for value, vsai in zip(properties, surface_areas):
# Find bin corresponding to a_i <= value < a_i+1
if bisect.bisect_right(intervals, value) + 1 == n:
surface_area_sum += vsai
return surface_area_sum
# =============================================================================
# External helper functions
# =============================================================================
[docs]def check_canvas_license():
"""
Check that a CANVAS_FULL license is available to use
"""
CANVAS_LICENSE = canvas_base.ChmLicenseShared()
if not CANVAS_LICENSE.isValid():
raise TopoError("VSA descriptors require Canvas license")
[docs]def matrix_reciprocal(matrix):
"""
Returns a matrix whose elements are the reciprocal of each of the
elements of the original matrix. The diagonal of the matrix must be
zeros, and the routine assumes that no other zeros exist in the matrix.
"""
if numpy.sum(numpy.diag(matrix)) != 0.0:
raise TopoError("Diagonal of matrix must be zeros.")
inv_matrix = matrix.copy()
# Non-zero number for inversion
numpy.fill_diagonal(inv_matrix, -1)
inv_matrix = old_div(1.0, inv_matrix)
# Assign zeros back to diagonal
numpy.fill_diagonal(inv_matrix, 0.0)
return inv_matrix