Source code for schrodinger.application.canvas.topo_descriptors

"""
Module which computes constitutional and topological descriptors, walk and
path counts, and connectivity indices.

Details of the descriptors implemented here are taken from:
"Handbook of Molecular Descriptors" by Mannhols, Kubinyi and Timmerman
"Molecular Descriptors for Chemoinformatics, Vol. I", by Todeschini and Consonni

Copyright Schrodinger, LLC. All rights reserved.
"""

import bisect
import math
from collections import Counter
from collections import defaultdict
from collections import namedtuple
from functools import partial
from itertools import combinations_with_replacement
from past.utils import old_div

import networkx
import numpy
from decorator import decorator

import schrodinger.application.canvas.base as canvas_base
from schrodinger.infra import canvas
from schrodinger.infra import mm
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.utils import log

logger = log.get_output_logger(__file__)

# Descriptor container
Descriptor = namedtuple("Descriptor", ["func", "cast_type", "label", "key"])
TOPO_DESCRIPTORS_PRODUCT = "desc"
HEAVY_ATOM_MAX = 150
# CANVAS-27492: Round floats to avoid platform differences
FLOAT_TRUNCATION = 8


[docs]class TopoError(Exception):
    """
    Error raised when unable to calculate topological descriptor
    """


[docs]@decorator
def cached_topo_descriptor(func, self, *args, **kwargs):
    """
    Decorator to extract previously calculated descriptor value from
    cached descriptors dictionary. If not available, the descriptor is
    calculated and stored for future reference.
    """

    key = tuple([func.__name__]) + args
    if key in self._cached_property:
        return self._cached_property[key]

    value = func(self, *args, **kwargs)
    self._cached_property[key] = value
    return value


[docs]class TopologicalDescriptors(object):

[docs]    def __init__(self, default_descriptor_value=None):
        """
        Initializes valid topological descriptors, and creates an empty
        cached property dictionary.
        """
        self._default_value = default_descriptor_value
        self._cached_property = {}
        self._available_descriptors = self._getDescriptorEntries()

[docs]    def allDescriptorFullLabels(self):
        """
        Returns all topological descriptor labels as label-key string

        :return: list of all descriptors as "Label (Key)" strings
        :rtype: list of str
        """
        return [
            self._makeFullLabel(desc) for desc in self._available_descriptors
        ]

[docs]    def calculateTopologicalDescriptors(self, st, descriptor_strings):
        """
        Calculates a set of topological descriptors for a given structure.

        :param st: structure to consider
        :type st: `structure.Structure`

        :param descriptor_strings: list of descriptors to calculate
        :type descriptor_strings: list of str

        :return: Structure copy appended with successful descriptor properties
        :rtype: `structure.Structure`
        """

        # Clear any previously cached properties
        self._cached_property.clear()

        # Create structure copy to which new properties can be appended
        working_st = self._extractSingleStructure(st)
        final_st = st.copy()

        # Ignore H or molecules with more 150 heavy atoms
        natom = self._getAtomCount(working_st)
        if natom == 0 or natom > HEAVY_ATOM_MAX:
            logger.warning("Molecule \"%s\" rejected:" % st.title)
            logger.warning("%d heavy atoms in the given molecule" % natom)
            return final_st

        for desc_str in descriptor_strings:
            try:
                desc_entry = self._findDescriptorEntry(desc_str)
                property_name = self.m2ioPropertyName(desc_entry)
                value = self._evaluateDescriptor(desc_entry, working_st)
                final_st.property[property_name] = value
            except TopoError as e:
                logger.info("-- unable to compute %s: %s" % (desc_str, e))
        return final_st

[docs]    def m2ioPropertyName(self, descriptor):
        """
        Create descriptor m2io property name for a Descriptor.

        :param descriptor: descriptor entry
        :type descriptor: `Descriptor`

        :return: m2io formatted descriptor property name
        :rtype: str
        """

        m2io_mapping = {float: "r", int: "i", bool: "b", str: "s"}

        # Create property name from label without spaces
        formatted_label = descriptor.label.strip().replace(" ", "_")
        property_name = TOPO_DESCRIPTORS_PRODUCT + "_" + formatted_label
        return m2io_mapping[descriptor.cast_type] + "_" + property_name

    def _extractSingleStructure(self, st):
        """
        Extracts a single structure if more than one is present.

        :param st: Original structure
        :type st: `structure.Structure`

        :return: Structure consisting of single largest molecule
        :rtype: `structure.Structure`
        """

        # If there is more than one molecule in the structure, use the largest
        if len(st.molecule) > 1:
            logger.warning("Molecule \"%s\" modified:" % st.title)
            logger.warning("More than one structure; using largest")
            mol_size = [len(mol) for mol in st.molecule]
            largest_mol_index = mol_size.index(max(mol_size)) + 1
            return st.molecule[largest_mol_index].extractStructure()
        # Otherwise the structure is fine as-is
        return st

    def _findDescriptorEntry(self, desc_str):
        """
        Locates the corresponding Descriptor in the class tuple of Descriptors.
        Looks for Descriptor based on matching the passed string to any of the
        key, label, or "label (key)" strings.

        :param desc_str: input descriptor string
        :type desc_str: str

        :return: Descriptor namedtuple
        :rtype: `Descriptor`

        :raise TopoError: If the descriptor entry could not be found
        """

        for entry in self._available_descriptors:
            if desc_str in [entry.key, entry.label, self._makeFullLabel(entry)]:
                return entry

        # Descriptor not found
        raise TopoError("\"%s\" unrecognized" % desc_str)

    def _makeFullLabel(self, descriptor):
        """
        Create complete descriptor label-key string

        :param descriptor: Descriptor namedtuple
        :type descriptor: `Descriptor`

        :return: full descriptor label
        :rtype: str
        """
        return "%s (%s)" % (descriptor.label, descriptor.key)

    def _evaluateDescriptor(self, descriptor, st):
        """
        Evalulates a descriptor for a given structure. If successful, the value
        is cast to the expected type before returning.

        :param descriptor: Descriptor namedtuple
        :type descriptor: `Descriptor`

        :return: descriptor value, cast to the appropriate return type
        :rtype: one of {float, int, bool, str}

        :raise TopoError: If the descriptor cast-type was unrecognized
        """

        # Evaluate descriptor, supplying
        try:
            value = descriptor.func(st)
        except TopoError as e:
            if self._default_value is not None:
                value = self._default_value
            else:
                raise TopoError(e)

        # Cast function call result
        try:
            if descriptor.cast_type is float:
                value = round(float(value), FLOAT_TRUNCATION)
            elif descriptor.cast_type is int:
                value = int(value)
            return value
        except TypeError as e:
            raise TopoError("Unable to cast to %s" % descriptor.cast_type)

        # Unknown cast_type
        raise TopoError("Unknown cast value %s" % descriptor.cast_type)

    # =========================================================================
    # Descriptor methods
    # =========================================================================

    def _getDescriptorEntries(self):
        """
        Returns set of valid descriptor tuples in the form of:
          (descriptor key, descriptor label, function mapping)

        :return: set of available topological descriptor mappings
        :rtype: set of tuples
        """

        entries = []

        label = "First Zagreb"
        func = self._getFirstZagrebIndex
        entries.append(Descriptor(func, int, label, "ZM1"))

        label = "First Zagreb index by valence vertex degrees"
        func = self._getFirstZagrebValenceIndex
        entries.append(Descriptor(func, float, label, "ZM1V"))

        label = "Second Zagreb"
        func = self._getSecondZagrebIndex
        entries.append(Descriptor(func, int, label, "ZM2"))

        label = "Second Zagreb index by valence vertex degrees"
        func = self._getSecondZagrebValenceIndex
        entries.append(Descriptor(func, float, label, "ZM2V"))

        label = "Polarity"
        func = self._getPolarityNumber
        entries.append(Descriptor(func, int, label, "Pol"))

        label = "Narumi Simple Topological"
        func = self._getSimpleTopologicalIndex
        entries.append(Descriptor(func, float, label, "NST"))

        label = "Narumi Harmonic Topological"
        func = self._getHarmonicTopologicalIndex
        entries.append(Descriptor(func, float, label, "NHT"))

        label = "Narumi Geometric Topological"
        func = self._getGeometricTopologicalIndex
        entries.append(Descriptor(func, float, label, "NGT"))

        label = "Total structure connectivity"
        func = self._getTotalStructureConnectivityIndex
        entries.append(Descriptor(func, float, label, "TSC"))

        label = "Wiener"
        func = self._getWienerIndex
        entries.append(Descriptor(func, int, label, "W"))

        label = "Mean Wiener"
        func = self._getMeanWienerIndex
        entries.append(Descriptor(func, float, label, "MW"))

        label = "Xu"
        func = self._getXuIndex
        entries.append(Descriptor(func, float, label, "Xu"))

        label = "Quadratic"
        func = self._getQuadraticIndex
        entries.append(Descriptor(func, int, label, "QIndex"))

        label = "Radial centric"
        func = self._getRadialCentricInformationIndex
        entries.append(Descriptor(func, float, label, "RC"))

        label = "Mean Square Distance Balaban"
        func = self._getMeanSquareDistanceIndex
        entries.append(Descriptor(func, float, label, "MSDB"))

        label = "Superpendentic"
        func = self._getSuperpendenticIndex
        entries.append(Descriptor(func, float, label, "SP"))

        label = "Harary"
        func = self._getHararyIndex
        entries.append(Descriptor(func, float, label, "Har"))

        label = "Log of product of row sums"
        func = self._getLogPRSIndex
        entries.append(Descriptor(func, float, label, "LPRS"))

        label = "Pogliani"
        func = self._getPoglianiIndex
        entries.append(Descriptor(func, float, label, "Pog"))

        label = "Schultz Molecular Topological"
        func = self._getSchultzMolecularTopologicalIndex
        entries.append(Descriptor(func, int, label, "SMT"))

        label = "Schultz Molecular Topological by valence vertex degrees"
        func = self._getSchultzMolecularTopologicalValenceIndex
        entries.append(Descriptor(func, float, label, "SMTV"))

        label = "Mean Distance Degree Deviation"
        func = self._getMeanDistanceDegreeDeviation
        entries.append(Descriptor(func, float, label, "MDDD"))

        label = "Ramification"
        func = self._getRamificationIndex
        entries.append(Descriptor(func, int, label, "Ram"))

        label = "Gutman Molecular Topological"
        func = self._getGutmanMolecularTopologicalIndex
        entries.append(Descriptor(func, int, label, "GMT"))

        label = "Gutman MTI by valence vertex degrees"
        func = self._getGutmanMolecularTopologicalValenceIndex
        entries.append(Descriptor(func, float, label, "GMTV"))

        label = "Average vertex distance degree"
        func = self._getAverageDistanceDegree
        entries.append(Descriptor(func, float, label, "AVDD"))

        label = "Unipolarity"
        func = self._getUnipolarity
        entries.append(Descriptor(func, int, label, "UP"))

        label = "Centralization"
        func = self._getCentralization
        entries.append(Descriptor(func, int, label, "CENT"))

        label = "Variation"
        func = self._getVariation
        entries.append(Descriptor(func, int, label, "VAR"))

        label = "Molecular electrotopological variation"
        func = self._getMolecularElectrotopologicalVariation
        entries.append(Descriptor(func, float, label, "MEV"))

        label = "Maximal electrotopological positive variation"
        func = self._getMaximalElectrotopologicalPositiveVariation
        entries.append(Descriptor(func, float, label, "MEPV"))

        label = "Maximal electrotopological negative variation"
        func = self._getMaximalElectrotopologicalNegativeVariation
        entries.append(Descriptor(func, float, label, "MENV"))

        label = "Eccentric connectivity"
        func = self._getEccentricConnectivityIndex
        entries.append(Descriptor(func, int, label, "ECCc"))

        label = "Eccentricity"
        func = self._getEccentricity
        entries.append(Descriptor(func, int, label, "ECC"))

        label = "Average eccentricity"
        func = self._getAverageEccentricity
        entries.append(Descriptor(func, float, label, "AECC"))

        label = "Eccentric"
        func = self._getEccentric
        entries.append(Descriptor(func, float, label, "DECC"))

        for i in range(6):
            label = "Valence connectivity index chi-%d" % i
            func = partial(self._getValenceConnectivityIndex, m=i)
            entries.append(Descriptor(func, float, label, "vX%d" % i))

        for i in range(6):
            label = "Average valence connectivity index chi-%d" % i
            func = partial(self._getMeanValenceConnectivityIndex, m=i)
            entries.append(Descriptor(func, float, label, "AvX%d" % i))

        label = "Quasi Wiener"
        func = self._getQuasiWienerIndex
        entries.append(Descriptor(func, float, label, "QW"))

        label = "First Mohar"
        func = self._getFirstMoharIndex
        entries.append(Descriptor(func, float, label, "FM"))

        label = "Second Mohar"
        func = self._getSecondMoharIndex
        entries.append(Descriptor(func, float, label, "SM"))

        label = "Spanning tree number"
        func = self._getSpanningTreeNumber
        entries.append(Descriptor(func, float, label, "STN"))

        label = "Kier benzene-likeliness index"
        func = self._getBenzeneLikelinessIndex
        entries.append(Descriptor(func, float, label, "KBLI"))

        for i in range(1, 11):
            label = "Topological charge index of order %d" % i
            func = partial(self._getTopologicalChargeIndex, k=i)
            entries.append(Descriptor(func, float, label, "TCI%d" % i))

        for i in range(1, 11):
            label = "Mean topological charge index of order %d" % i
            func = partial(self._getMeanTopologicalChargeIndex, k=i)
            entries.append(Descriptor(func, float, label, "MTCI%d" % i))

        label = "Global topological charge"
        func = self._getGlobalTopologicalChargeIndex
        entries.append(Descriptor(func, float, label, "GTC"))

        label = "Hyper-distance-path index"
        func = self._getHyperDistancePathIndex
        entries.append(Descriptor(func, float, label, "HDPI"))

        label = "Reciprocal hyper-distance-path index"
        func = self._getReciprocalHyperDistancePathIndex
        entries.append(Descriptor(func, float, label, "RHDPI"))

        label = "Square reciprocal distance sum"
        func = self._getSquareReciprocalDistanceSumIndex
        entries.append(Descriptor(func, float, label, "SRDS"))

        label = "Modified Randic connectivity"
        func = self._getModifiedRandicIndex
        entries.append(Descriptor(func, float, label, "MRC"))

        label = "Balaban centric"
        func = self._getBalabanCentricIndex
        entries.append(Descriptor(func, float, label, "BC"))

        label = "Lopping centric"
        func = self._getLoppingCentricInformationIndex
        entries.append(Descriptor(func, float, label, "LC"))

        label = "Kier Hall electronegativity"
        func = self._getKierHallElectronegativitySum
        entries.append(Descriptor(func, float, label, "KHE"))

        elements = ["N", "O", "S", "P", "F", "Cl", "Br", "I"]
        for a, b in combinations_with_replacement(elements, 2):
            label = "Sum of topological distances between %s..%s" % (a, b)
            func = partial(self._getTopologicalDistanceSum, elem1=a, elem2=b)
            key = "STD(%s,%s)" % (a, b)
            entries.append(Descriptor(func, int, label, key))

        label = "Wiener-type index from Z weighted distance matrix - Barysz matrix"
        func = self._getBaryszWeinerTypeIndex
        entries.append(Descriptor(func, float, label, "WhetZ"))

        label = "Wiener-type index from electronegativity weighted distance matrix"
        func = self._getElectronegativityWienerTypeIndex
        entries.append(Descriptor(func, float, label, "Whete"))

        label = "Wiener-type index from mass weighted distance matrix"
        func = self._getMassWienerTypeIndex
        entries.append(Descriptor(func, float, label, "Whetm"))

        label = "Wiener-type index from van der waals weighted distance matrix"
        func = self._getVDWVolumeWienerTypeIndex
        entries.append(Descriptor(func, float, label, "Whetv"))

        label = "Wiener-type index from polarizability weighted distance matrix"
        func = self._getPolarizabilityWienerTypeIndex
        entries.append(Descriptor(func, float, label, "Whetp"))

        label = "Balaban-type index from Z weighted distance matrix - Barysz matrix"
        func = self._getBaryszIndex
        entries.append(Descriptor(func, float, label, "JhetZ"))

        label = "Balaban-type index from electronegativity weighted distance matrix"
        func = self._getElectronegativityBalabanTypeIndex
        entries.append(Descriptor(func, float, label, "Jhete"))

        label = "Balaban-type index from mass weighted distance matrix"
        func = self._getMassBalabanTypeIndex
        entries.append(Descriptor(func, float, label, "Jhetm"))

        label = "Balaban-type index from van der waals weighted distance matrix"
        func = self._getVDWVolumeBalabanTypeIndex
        entries.append(Descriptor(func, float, label, "Jhetv"))

        label = "Balaban-type index from polarizability weighted distance matrix"
        func = self._getPolarizabilityBalabanTypeIndex
        entries.append(Descriptor(func, float, label, "Jhetp"))

        label = "Topological diameter"
        func = self._getTopologicalDiameter
        entries.append(Descriptor(func, int, label, "TD"))

        label = "Topological radius"
        func = self._getTopologicalRadius
        entries.append(Descriptor(func, int, label, "TR"))

        label = "Petitjean 2D shape"
        func = self._getPetitjeanShapeIndex
        entries.append(Descriptor(func, float, label, "PJ2DS"))

        label = "Balaban distance connectivity index"
        func = self._getBalabanDistanceConnectivityIndex
        entries.append(Descriptor(func, float, label, "J"))

        for i in range(6):
            label = "Solvation connectivity index chi-%d" % i
            func = partial(self._getSolvationConnectivityIndex, m=i)
            entries.append(Descriptor(func, float, label, "SCIX%d" % i))

        for i in range(6):
            label = "Connectivity index chi-%d" % i
            if i == 1:
                label = "Connectivity chi-1 [Randic connectivity]"
            func = partial(self._getConnectivityIndex, m=i)
            entries.append(Descriptor(func, float, label, "CIX%d" % i))

        for i in range(6):
            label = "Average connectivity index chi-%d" % i
            func = partial(self._getMeanConnectivityIndex, m=i)
            entries.append(Descriptor(func, float, label, "ACIX%d" % i))

        label = "reciprocal distance Randic-type index"
        func = self._getRDCHIIndex
        entries.append(Descriptor(func, float, label, "RDR"))

        label = "reciprocal distance square Randic-type index"
        func = self._getRDSQIndex
        entries.append(Descriptor(func, float, label, "RDSR"))

        for i in range(1, 4):
            label = "%d-path Kier alpha-modified shape index" % i
            func = partial(self._getKierAlphaModifiedShapeIndex, m=i)
            entries.append(Descriptor(func, float, label, "KAMS%d" % i))

        label = "Kier flexibility"
        func = self._getKierMolecularFlexibilityIndex
        entries.append(Descriptor(func, float, label, "KF"))

        for i in range(2, 6):
            label = "path/walk %d - Randic shape index" % i
            func = partial(self._getMolecularPathWalkIndex, m=i)
            entries.append(Descriptor(func, float, label, "RSIpw%d" % i))

        label = "E-state topological parameter"
        func = self._getEStateIndex
        entries.append(Descriptor(func, float, label, "ETP"))

        label = "Chirality count"
        func = self._getChiralAtomCount
        entries.append(Descriptor(func, int, label, "CHRCOUNT"))

        for i in range(3, 21):
            label = "Ring Count %d" % i
            func = partial(self._getRingCount, n=i)
            entries.append(Descriptor(func, int, label, "RNGCNT%d" % i))

        label = "Atom Count"
        func = self._getAtomCount
        entries.append(Descriptor(func, int, label, "ATMCNT"))

        label = "Bond Count"
        func = self._getBondCount
        entries.append(Descriptor(func, int, label, "BNDCNT"))

        label = "Atoms in Ring System"
        func = self._getRingAtomCount
        entries.append(Descriptor(func, int, label, "ATMRNGCNT"))

        label = "Bonds in Ring System"
        func = self._getRingBondCount
        entries.append(Descriptor(func, int, label, "BNDRNGCNT"))

        label = "Cyclomatic number"
        func = self._getCyclomaticNumber
        entries.append(Descriptor(func, int, label, "CYCLONUM"))

        label = "Number of ring systems"
        func = self._getNumberRingSystems
        entries.append(Descriptor(func, int, label, "NRS"))

        label = "Normalized number of ring systems"
        func = self._getNormalizedNumberRingSystems
        entries.append(Descriptor(func, float, label, "NNRS"))

        label = "Ring Fusion degree"
        func = self._getRingFusionDegree
        entries.append(Descriptor(func, float, label, "RFD"))

        label = "Total ring size"
        func = self._getTotalRingSize
        entries.append(Descriptor(func, int, label, "TRS"))

        label = "Ring perimeter"
        func = self._getRingPerimeter
        entries.append(Descriptor(func, int, label, "RNGPERM"))

        label = "Ring bridge count"
        func = self._getRingBridges
        entries.append(Descriptor(func, int, label, "RNGBDGE"))

        label = "Molecule cyclized degree"
        func = self._getMolecularCyclizedDegree
        entries.append(Descriptor(func, float, label, "MCD"))

        label = "Ring Fusion density"
        func = self._getRingFusionDensity
        entries.append(Descriptor(func, float, label, "RFDELTA"))

        label = "Ring complexity index"
        func = self._getRingComplexityIndex
        entries.append(Descriptor(func, float, label, "RCI"))

        label = "Van der Waals surface area"
        func = self._getTotalVDWSurfaceArea
        entries.append(Descriptor(func, float, label, "VSA"))

        for i in range(1, 9):
            label = "MR%d" % i
            func = partial(self._getPVSAMolarRefractivity, n=i)
            entries.append(Descriptor(func, float, label, "MR%d" % i))

        for i in range(1, 11):
            label = "ALOGP%d" % i
            func = partial(self._getPVSAlogP, n=i)
            entries.append(Descriptor(func, float, label, "ALOGP%d" % i))

        for i in range(1, 15):
            label = "PEOE%d" % i
            func = partial(self._getPVSAPartialCharges, n=i)
            entries.append(Descriptor(func, float, label, "PEOE%d" % i))

        return tuple(entries)

    # =========================================================================
    # Common descriptor asserts
    # =========================================================================

    def _assertMultipleHeavyAtoms(self, st):
        """
        Raises a TopoError if the structure has only one heavy atom
        """
        if self._getAtomCount(st) == 1:
            raise TopoError("Requires more than one atom")

    def _assertPathLength(self, st, m):
        """
        Raises a TopoError if the structure does not have any paths of length m
        """
        if m not in self._getConnectivityPaths(st):
            raise TopoError("No paths of length %d present in molecule" % m)

    # =========================================================================
    # Descriptor methods
    # =========================================================================

    @cached_topo_descriptor
    def _getFirstZagrebIndex(self, st):
        """
        Handbook p. 509: First Zagreb index (M_1) -- topological index based on
        atomic vertex degrees. The first index in strictly related to zero-order
        connectivity index. Also called the Gutman index.
        """
        vertex_func = self._getVertexDegree
        # NOTE: vertex_func(i)^2 instead of _generalizedConnectivityIndex
        return sum(
            [pow(vertex_func(st, i), 2) for i in self._getAtomIndices(st)])

    @cached_topo_descriptor
    def _getFirstZagrebValenceIndex(self, st):
        """
        Handbook p. 509: First valence Zagreb index (M^v_1) -- topological index
        based on atomic valence vertex degrees. The first index in strictly
        related to zero-order connectivity index.
        """
        vertex_func = self._getValenceVertexDegree
        # NOTE: vertex_func(i)^2 instead of _generalizedConnectivityIndex
        return sum(
            [pow(vertex_func(st, i), 2) for i in self._getAtomIndices(st)])

    @cached_topo_descriptor
    def _getSecondZagrebIndex(self, st):
        """
        Handbook p. 509: Second Zagreb index (M_2) -- topological index based on
        atomic vertex degrees. The second index in strictly related to
        first-order connectivity index; it is part of the Schuttz molecular
        topological index.
        """
        vertex_func = self._getVertexDegree
        return self._generalizedConnectivityIndex(st, 1, vertex_func, 1.0)

    @cached_topo_descriptor
    def _getSecondZagrebValenceIndex(self, st):
        """
        Handbook p. 509: Second valence Zagreb index (M^v_2) -- topological
        index based on atomic valence vertex degrees. The second index in
        strictly related to first-order connectivity index.
        """
        vertex_func = self._getValenceVertexDegree
        return self._generalizedConnectivityIndex(st, 1, vertex_func, 1.0)

    @cached_topo_descriptor
    def _getPolarityNumber(self, st):
        """
        Handbook p. 114: polarity number (p_2) -- also known as Wiener polarity
        number; the number of pairs of graph vertices which are separated by
        three edges. It is usually assumed that the polarity number accounts
        for the flexibility of acyclic structures, p being equal to the number
        of bonds around which free rotations can take place.
        """
        return self._getDistanceCount(st, 3)

    @cached_topo_descriptor
    def _getSimpleTopologicalIndex(self, st):
        """
        Handbook p. 476: simple topological index (S) -- descriptor related to
        molecular branching, proposed as the product of the vertex degrees
        for each atom [Narumi, 1987].
        NOTE: Take the natural log to avoid overflow.
        """
        self._assertMultipleHeavyAtoms(st)  # prevent log(0)
        return sum([
            math.log(self._getVertexDegree(st, i))
            for i in self._getAtomIndices(st)
        ])

    @cached_topo_descriptor
    def _getHarmonicTopologicalIndex(self, st):
        """
        Handbook p. 476: harmonic topological index (H) -- descriptor related
        to the simple topological index [Narumi, 1987].
        """
        self._assertMultipleHeavyAtoms(st)  # prevent divide by zero
        inv_vertex = [
            old_div(1.0, self._getVertexDegree(st, i))
            for i in self._getAtomIndices(st)
        ]
        return old_div(self._getAtomCount(st), sum(inv_vertex))

    @cached_topo_descriptor
    def _getGeometricTopologicalIndex(self, st):
        """
        Handbook p. 476: geometric topological index (G) -- descriptor related
        to the simple topological index [Narumi, 1987].
        """
        exponent = old_div(1.0, self._getAtomCount(st))
        return pow(math.exp(self._getSimpleTopologicalIndex(st)), exponent)

    @cached_topo_descriptor
    def _getTotalStructureConnectivityIndex(self, st):
        """
        Handbook p. 86: total structure connectivity index -- connectivity index
        contemporarily accounting for all the atoms in the graph. This is
        the square root of the simple topological index proposed by Narumi for
        measuring molecular branching. (Handbook misprint: inverse square root)
        """
        # NOTE: DRAGON uses the log of the Narumi Simple Topological index
        return pow(math.exp(self._getSimpleTopologicalIndex(st)), -0.5)

    def _wienerOperator(self, matrix):
        """
        Handbook p. 9, Wiener operator W(M) -- half the sum of the off-diagonal
        entries of the matrix M; name taken from the Wiener index.
        """
        return 0.5 * numpy.sum(matrix)

    @cached_topo_descriptor
    def _getWienerIndex(self, st):
        """
        Handbook p. 497: Wiener index (W) -- the sum over all bonds of the
        product of the number of vertices on each side of the bond; ie. the sum
        of all topological distancesinthe H-depleted molecular graph. Also
        known as the Weiner number.
        """
        return self._wienerOperator(self._getDistanceMatrix(st))

    @cached_topo_descriptor
    def _getMeanWienerIndex(self, st):
        """
        Handbook p. 497: mean Wiener index (W_bar) -- defined from the Wiener
        index as 2 * W / (A * (A - 1))
        """
        self._assertMultipleHeavyAtoms(st)  # prevent divide by zero
        n = self._getAtomCount(st)
        return 2.0 * self._getWienerIndex(st) / (n * (n - 1))

    @cached_topo_descriptor
    def _getXuIndex(self, st):
        """
        Handbook p. 507: Xu index -- descriptor accounting for molecular size
        and branching. Defined as sqrt(A) * ln(L), where L represents the
        valence average topological distance calculated by vertex degree and
        vertex distance degree of all atoms.
        NOTE: Use natural log (comparable to E-DRAGON)
        """
        self._assertMultipleHeavyAtoms(st)
        indices = self._getAtomIndices(st)
        delta = [self._getVertexDegree(st, i) for i in indices]
        sigma = [self._getVertexDistanceDegree(st, i) for i in indices]
        L = numpy.dot(numpy.multiply(delta, sigma), sigma)
        L /= float(numpy.dot(delta, sigma))
        return math.sqrt(self._getAtomCount(st)) * math.log(L)

    @cached_topo_descriptor
    def _getQuadraticIndex(self, st):
        """
        Handbook p. 44: quadratic index (Q) -- obtained by normalization of the
        first Zagreb index. Also called normalized quadratic index.
        """
        M1 = self._getFirstZagrebIndex(st)
        return 3 - (2 * self._getAtomCount(st)) + (old_div(M1, 2.0))

    @cached_topo_descriptor
    def _getRadialCentricInformationIndex(self, st):
        """
        Handbook p. 44: radial centric information index (I^V_C,R) -- defined
        as the lopping centric information index, but where ng is the number of
        graph vertices having the same atom eccentricity.
        """
        n = float(self._getAtomCount(st))
        info_func = lambda x: -(old_div(x, n)) * math.log((old_div(x, n)), 2)
        return self._centricIndexSum(st, self._getAtomEccentricity, info_func)

    @cached_topo_descriptor
    def _getMeanSquareDistanceIndex(self, st):
        """
        Handbook p. 113: mean square distance index (MSD) -- calculated from the
        second order distance distribution moment [Balaban, 1983a].
        NOTE: Handbook misprint -- the square root should encompass [A * (A-1)]
        """
        self._assertMultipleHeavyAtoms(st)  # prevent divide by zero
        n = float(self._getAtomCount(st))
        sq_dist_sum = numpy.sum(numpy.square(self._getDistanceMatrix(st)))
        return pow(old_div(sq_dist_sum, (n * (n - 1))), 0.5)

    @cached_topo_descriptor
    def _getSuperpendenticIndex(self, st):
        """
        Handbook p. 431: superpendentic index -- calculated from the pendent
        matrix, which is a submatrix of the distance matrix which is to the
        number of atoms by the number of terminal vertices. The superpendentic
        index is calculated as the square root of the sum of the products of
        the nonzero row elements in the pendent matrix.

        NOTE: As with DRAGON, instead of returning sqrt of sum of ow products:
            pow(sum(numpy.prod(pendent_matrix, axis=1)), 0.5)

        we avoid overflow by returning the sqrt of sum of log(row products)
        """
        terminal_vertices = [
            index for index in self._getAtomIndices(st)
            if self._getVertexDegree(st, index) == 1
        ]
        if not terminal_vertices:
            raise TopoError("No terminal vertices for superpendentic")
        # Instead of forming a new matrix, modify the distance matrix
        pendent_matrix = self._getDistanceMatrix(st).copy()
        # Ignore zeros on the diagonal
        numpy.fill_diagonal(pendent_matrix, 1)
        # Ignore columns that are not terminal vertices
        for index in self._getAtomIndices(st):
            if index not in terminal_vertices:
                pendent_matrix[:, index - 1].fill(1)
        # Return sqrt of sum of log(row products)
        return pow(numpy.sum(numpy.log(pendent_matrix)), 0.5)

    @cached_topo_descriptor
    def _getHararyIndex(self, st):
        """
        Handbook p. 209: Harary index (H) -- topological index derived from the
        reciprocal distance matrix by the Wiener operator (Harary number)
        """
        return self._wienerOperator(self._getReciprocalDistanceMatrix(st))

    @cached_topo_descriptor
    def _getLogPRSIndex(self, st):
        """
        Handbook p. 116: Log of PRS index -- log(Product of Row Sums index)
        defined as the lof of the product of the vertex distance degrees.
        Taking the log is preferred due to the large values that can be reached
        by the PRS index. NOTE: As per Handbook, log is base 10
        """
        self._assertMultipleHeavyAtoms(st)  # prevent log(0)
        # Distribute log to avoid overflow
        return sum([
            math.log10(self._getVertexDistanceDegree(st, i))
            for i in self._getAtomIndices(st)
        ])

    @cached_topo_descriptor
    def _getPoglianiIndex(self, st):
        """
        Handbook p. 116: Pogliani index (D^nu) -- sum of the ratio of the number
        of valence electrons to the principal quantum number for each atom.
        """
        pogliani = 0.0
        for atom in self._getHeavyStructure(st).atom:
            Zv_i = mm.mmelement_get_valence_count(atom.atomic_number)
            L_i = mm.mmelement_get_period(atom.atomic_number)
            pogliani += old_div(Zv_i, float(L_i))
        return pogliani

    def _baseSchultzIndex(self, st, vertex_func):
        """
        Handbook p. 381: Schultz molecular topological index (MTI) -- index
        derived from the adjacency and distance matrices, defined as the sum
        over (A+D).v intricacy numbers.
        NOTE: Handbook decomposes the index into two parts, a sum of square
        vertex degrees and vertex degree-vertex distance degree products, and
        erroneously labels the former as "the second Zagreb index". The sum for
        M2 is over only bonded pair i-j, whereas here is over ALL i-j pairs.

        :param vertex_func: vertex degree function
        :type vertex_func: f(st, index)
        """
        MTI = 0.0
        for i in self._getAtomIndices(st):
            vertex_degree = vertex_func(st, i)
            MTI += pow(vertex_degree, 2)
            MTI += vertex_degree * self._getVertexDistanceDegree(st, i)
        return MTI

    @cached_topo_descriptor
    def _getSchultzMolecularTopologicalIndex(self, st):
        """
        Handbook p. 381: Schultz molecular topological index (MTI) --
        molecular topological index calculated using the vertex degree.
        """
        return self._baseSchultzIndex(st, self._getVertexDegree)

    @cached_topo_descriptor
    def _getSchultzMolecularTopologicalValenceIndex(self, st):
        """
        Handbook p. 382: Schultz molecular topological valence index (MTI^v) --
        a vertex-valency-weighted analogue to the Schultz  molecular topological
        index calculated using the valence vertex degree.
        """
        return self._baseSchultzIndex(st, self._getValenceVertexDegree)

    @cached_topo_descriptor
    def _getMeanDistanceDegreeDeviation(self, st):
        """
        Handbook p. 114: mean distance degree deviation (delta sigma) -- the
        mean devaiationo f the row sum of the distance matrix from its mean.
        """
        average = self._getAverageDistanceDegree(st)
        return numpy.mean([
            abs(self._getVertexDistanceDegree(st, i) - average)
            for i in self._getAtomIndices(st)
        ])

    @cached_topo_descriptor
    def _getRamificationIndex(self, st):
        """
        Handbook p. 475: ramification index (r) -- a simple ramification index
        proposed for acyclic graphs, where the sum runs over all the vertex
        degrees greater than two.
        """
        delta = [self._getVertexDegree(st, i) for i in self._getAtomIndices(st)]
        return sum(di - 2 for di in delta if di > 2)

    def _baseGutmanIndex(self, st, vertex_func):
        """
        Handbook p. 382: Gutman molecular topological index (S_G) -- sum of the
        topological distance between the vertices vi and vj weighted by the
        product of the endpoint vertex degrees.

        :param vertex_func: vertex degree function
        :type vertex_func: f(st, index)
        """
        dist_matrix = self._getDistanceMatrix(st)
        SG = 0
        for i, j in self._getAtomPairs(st):
            vi, vj = vertex_func(st, i), vertex_func(st, j)
            SG += dist_matrix[i - 1][j - 1] * vi * vj
        return SG

    @cached_topo_descriptor
    def _getGutmanMolecularTopologicalIndex(self, st):
        """
        Handbook p. 382: Gutman molecular topological index (S_G) -- the
        vertex degree weighted form of S_G.
        """
        return self._baseGutmanIndex(st, self._getVertexDegree)

    @cached_topo_descriptor
    def _getGutmanMolecularTopologicalValenceIndex(self, st):
        """
        Handbook p. 382: Gutman molecular topological valence index (S_G) --
        a vertex-valency-weighted analogue of S_G, where the weighting factor
        is multiplicative.
        """
        return self._baseGutmanIndex(st, self._getValenceVertexDegree)

    @cached_topo_descriptor
    def _getAverageDistanceDegree(self, st):
        """
        Handbook p. 114: average distance degree (sigma^bar) -- average row sum
        of the distance matrix. Note this is also 2 * (Wiener index) / A.
        """
        return numpy.mean([
            self._getVertexDistanceDegree(st, i)
            for i in self._getAtomIndices(st)
        ])

    @cached_topo_descriptor
    def _getUnipolarity(self, st):
        """
        Handbook p. 114: unipolarity (simga*) -- minimum value of the vertex
        distance degrees.
        """
        return min([
            self._getVertexDistanceDegree(st, i)
            for i in self._getAtomIndices(st)
        ])

    @cached_topo_descriptor
    def _getCentralization(self, st):
        """
        Handbook p. 114: centralization (delta sigma^*) -- molecular invariant
        immediately derived from the distance matrix.
        """
        n = self._getAtomCount(st)
        return 2 * self._getWienerIndex(st) - n * self._getUnipolarity(st)

    @cached_topo_descriptor
    def _getVariation(self, st):
        """
        Handbook p. 114: variation (delta sigma^+) -- molecular invariant
        immediately derived from the distance matrix.
        """
        unipolarity = self._getUnipolarity(st)
        return max([
            self._getVertexDistanceDegree(st, i) - unipolarity
            for i in self._getAtomIndices(st)
        ])

    @cached_topo_descriptor
    def _getMolecularElectrotopologicalVariation(self, st):
        """
        Adopted from DRAGON: molecular electrotopological variation
        """
        return sum(numpy.fabs(self._getFieldEffects(st)))

    @cached_topo_descriptor
    def _getMaximalElectrotopologicalPositiveVariation(self, st):
        """
        Adopted from DRAGON: maximal electrotopological positive variation
        """
        positive_fe = [fe for fe in self._getFieldEffects(st) if fe > 0.0]
        if not positive_fe:
            raise TopoError("No positive field effect values")
        return max(positive_fe)

    @cached_topo_descriptor
    def _getMaximalElectrotopologicalNegativeVariation(self, st):
        """
        Adopted from DRAGON: maximal electrotopological negative variation
        """
        positive_fe = [abs(fe) for fe in self._getFieldEffects(st) if fe < 0.0]
        if not positive_fe:
            raise TopoError("No negative field effect values")
        return max(positive_fe)

    @cached_topo_descriptor
    def _getEccentricConnectivityIndex(self, st):
        """
        Handbook p. 124: eccentric connectivity index (xi^C) -- the sum of the
        products between eccentricity and vertex degree over all atoms.
        """
        xi_sum = 0.0
        for i in self._getAtomIndices(st):
            nu_i = self._getAtomEccentricity(st, i)
            delta_i = self._getVertexDegree(st, i)
            xi_sum += nu_i * delta_i
        return xi_sum

    @cached_topo_descriptor
    def _getEccentricity(self, st):
        """
        Handbook p. 112: eccentricity (nu) -- sum of atom eccentricities.
        """
        return sum([
            self._getAtomEccentricity(st, i) for i in self._getAtomIndices(st)
        ])

    @cached_topo_descriptor
    def _getAverageEccentricity(self, st):
        """
        Handbook p. 112: average atom eccentricity (nu^bar) -- average of atom
        eccentricities.
        """
        return old_div(float(self._getEccentricity(st)), self._getAtomCount(st))

    @cached_topo_descriptor
    def _getEccentric(self, st):
        """
        Handbook p. 112: eccentricity (Delta nu) -- mean deviation from average
        of atom eccentricities.
        """
        average = self._getAverageEccentricity(st)
        dev_sum = sum([
            abs(self._getAtomEccentricity(st, i) - average)
            for i in self._getAtomIndices(st)
        ])
        return old_div(float(dev_sum), self._getAtomCount(st))

    @cached_topo_descriptor
    def _getValenceConnectivityIndex(self, st, m):
        """
        Handbook p. 85: connectivity indices of mth order -- usually known as
        Kier-Hall connectivity indices, defined a general scheme based on the
        Randic index to also calculate zero-order and higher-order descriptors.

        :param m: index order of the connectivity index
        :type m: int
        """
        vertex_func = self._getValenceVertexDegree
        return self._generalizedConnectivityIndex(st, m, vertex_func, -0.5)

    @cached_topo_descriptor
    def _getMeanValenceConnectivityIndex(self, st, m):
        """
        Handbook p. 86: mean valence connectivity indices of mth order -- again,
        replacing the vertex degree by the valence vertex degree in the similar
        mean connectivity indices.

        :param m: index order of the connectivity index
        :type m: int
        """
        connectivity_index = float(self._getValenceConnectivityIndex(st, m))
        return old_div(connectivity_index,
                       len(self._getConnectivityPaths(st)[m]))

    @cached_topo_descriptor
    def _getQuasiWienerIndex(self, st):
        """
        Handbook p. 253: quasi-Wiener index (W^*) -- sum of the reciprocal A - 1
        positive eigenvalues was proposed as a molecular descriptor. For acyclic
        graphs, the quasi-Wiener index coincides with the Wiener index, while
        for cycle-containing graphs the two descriptors differ. Moreover, it has
        been demonstrated that the quasi-Wiener index coincides with the
        Kirchhoff number for any graph.
        """
        eigenvalues = self._getLaplacianEigenvalues(st)[:-1]  # A-1 values
        quasi_sum = sum(
            old_div(1.0, eigenvalues[i]) for i in range(len(eigenvalues)))
        return self._getAtomCount(st) * quasi_sum

    @cached_topo_descriptor
    def _getFirstMoharIndex(self, st):
        """
        Handbook p. 254: first Mohar indices (TI)_1 -- index derived from the
        Laplacian matrix.
        """
        self._assertMultipleHeavyAtoms(st)  # prevent log(0)
        bond_atom_ratio = old_div(float(self._getBondCount(st)),
                                  self._getAtomCount(st))
        return 2 * math.log10(bond_atom_ratio) * self._getQuasiWienerIndex(st)

    @cached_topo_descriptor
    def _getSecondMoharIndex(self, st):
        """
        Handbook p. 254: second Mohar indices (TI)_2 -- index derived from the
        Laplacian matrix.
        """
        first_non_zero_eigen = self._getLaplacianEigenvalues(st)[-2]
        return old_div(4.0, (self._getAtomCount(st) * first_non_zero_eigen))

    @cached_topo_descriptor
    def _getSpanningTreeNumber(self, st):
        """
        Handbook p. 253: log of spanning tree number (T^*) -- log of the product
        of the positive A-1 eigenvalues of the Laplacian matrix.
        NOTE: Take the natural log to avoid overflow.
        """
        eigenvalues = self._getLaplacianEigenvalues(st)[:-1]  # A-1 values
        eigenvalue_logsum = sum(math.log(lamdda_i) for lamdda_i in eigenvalues)
        return eigenvalue_logsum - math.log(self._getAtomCount(st))

    @cached_topo_descriptor
    def _getBenzeneLikelinessIndex(self, st):
        """
        Handbook p. 379: benzene-likeliness index -- an aromaticity index based
        on the first-order valence connectivity index divided by the number of
        bonds and normalized on the benzene molecule.
        """
        vX1 = self._getValenceConnectivityIndex(st, 1)
        return 3 * vX1 / self._getBondCount(st)

    @cached_topo_descriptor
    def _getTopologicalChargeIndex(self, st, k):
        """
        Handbook p. 445: topological charge index (G_k) -- the half-sum of all
        charge terms corresponding to pair of vertices with topological
        distance = k and would represent the total charge transfer between
        atoms placed at topological distance k.

        :param k: path length
        :type k: int
        """
        charge_term_matrix = self._getChargeTermMatrix(st)
        distance_matrix = self._getDistanceMatrix(st)
        charge_sum_k = 0.0
        for i in range(distance_matrix.shape[0]):
            for j in range(i + 1, distance_matrix.shape[0]):
                # Distance matrix is symmetric, d_ij == d_ji
                if distance_matrix[i][j] == k:
                    # Charge term matrix is asymmetric, CT_ij != CT_ji
                    charge_sum_k += abs(charge_term_matrix[i][j])
                    charge_sum_k += abs(charge_term_matrix[j][i])
        return 0.5 * charge_sum_k

    @cached_topo_descriptor
    def _getMeanTopologicalChargeIndex(self, st, k):
        """
        Handbook p. 445: mean topological charge index (J_k) -- topological
        charge index divided by the number of edges in an acyclic molecule.
        Values are set to zero for k greater than the molecular diameter.

        :param k: path length
        :type k: int
        """
        if k > self._getTopologicalDiameter(st):
            return 0.0
        norm = self._getDistanceCount(st, k)
        return old_div(self._getTopologicalChargeIndex(st, k), float(norm))

    @cached_topo_descriptor
    def _getGlobalTopologicalChargeIndex(self, st):
        """
        Handbook p. 445: global topological charge index (J) -- sum over mean
        topological charge indinces, with the superior limit equal to 5. The
        value was proposed by the authors to obtain uniform length descriptors.
        NOTE: Past implementation, as well as DRAGON, sum up to k=10
        """
        return sum(
            self._getMeanTopologicalChargeIndex(st, i) for i in range(1, 11))

    @cached_topo_descriptor
    def _getHyperDistancePathIndex(self, st):
        """
        Handbook p. 118: hyper-distance-path index (D_p) -- defined as applying
        the Wiener operator to the distance-path matrix, where entry i-j of the
        matrix is calculated from the distance matrix D as all the possible
        combinations of two elements taken from d_ij + 1 elements (binomial
        coefficient).
        """
        dist_matrix = self._getDistanceMatrix(st)
        dist_path_matrix = old_div((numpy.power(dist_matrix, 2) + dist_matrix),
                                   2.0)
        return self._wienerOperator(dist_path_matrix)

    @cached_topo_descriptor
    def _getReciprocalHyperDistancePathIndex(self, st):
        """
        Handbook p. 118: reciprocal hyper-distance-pathindex (D_p^-1) -- defined
        in the same was as the hyper-distance-path index, but where elements of
        the distance-path matrix are reciprocal.
        """
        dist_matrix = self._getDistanceMatrix(st)
        dist_path_matrix = old_div((numpy.power(dist_matrix, 2) + dist_matrix),
                                   2.0)
        return self._wienerOperator(matrix_reciprocal(dist_path_matrix))

    @cached_topo_descriptor
    def _getSquareReciprocalDistanceSumIndex(self, st):
        """
        Handbook p. 210: square reciprocal distance sum index (Har2) -- the
        Harary index calculated with the reciprocal squared distance matrix.
        """
        return self._wienerOperator(self._getReciprocalSquareDistanceMatrix(st))

    @cached_topo_descriptor
    def _getModifiedRandicIndex(self, st):
        """
        Handbook p. 88: modified Randic index (chi^1_mod) -- sum of atomic
        properties, accounting for valence electrons and extended connectivities
        using a Randic connectivity index-type formula.

        0.5 * sum_atoms[ sum_atomi_bonds[ Z_i * (delta_i * delta_j) ^ -0.5 ]]
        """
        randic_sum = 0.0
        for atom in self._getHeavyStructure(st).atom:
            for bond in atom.bond:
                di = self._getVertexDegree(st, bond.atom1.index)
                dj = self._getVertexDegree(st, bond.atom2.index)
                randic_sum += old_div(float(atom.atomic_number),
                                      pow(di * dj, 0.5))
        return 0.5 * randic_sum

    @cached_topo_descriptor
    def _getBalabanCentricIndex(self, st):
        """
        Handbook p. 42: Balaban centric index (B) -- topological index defined
        for acyclic graphs based on the pruning of the graph, a stepwise
        procedure for removing all the terminal vertices.
        """
        if self._getCyclomaticNumber(st):
            raise TopoError("Cycles present, only defined for acyclic graphs")

        square_func = lambda x: x * x
        return self._centricIndexSum(st, self._getVertexDegree, square_func)

    @cached_topo_descriptor
    def _getLoppingCentricInformationIndex(self, st):
        """
        Handbook p. 42: lopping centric information index (I_B) -- index
        defined as the mean information content derived from the pruning of
        acyclic graphs based on the pruning of the graph, a stepwise
        procedure for removing all the terminal vertices.
        """
        if self._getCyclomaticNumber(st):
            raise TopoError("Cycles present, only defined for acyclic graphs")

        n = float(self._getAtomCount(st))
        info_func = lambda x: -(old_div(x, n)) * math.log((old_div(x, n)), 2)
        return self._centricIndexSum(st, self._getVertexDegree, info_func)

    @cached_topo_descriptor
    def _getKierHallElectronegativitySum(self, st):
        """
        Handbook p. 475: Kier-Hall electronegativity index (KHE) -- sum of
        Kier-Hall electrotopological states.
        """
        KHE_sum = 0.0
        for i, atom in enumerate(self._getHeavyStructure(st).atom, start=1):
            delta_vi = self._getValenceVertexDegree(st, i)
            delta_i = self._getVertexDegree(st, i)
            L_i = mm.mmelement_get_period(atom.atomic_number)
            KHE_sum += old_div(float(delta_vi - delta_i), pow(L_i, 2))
        return KHE_sum

    @cached_topo_descriptor
    def _getTopologicalDistanceSum(self, st, elem1, elem2):
        """
        Adopted from DRAGON: sum of topological distances between all pairs
        of given atom types.
        """
        distance_matrix = self._getDistanceMatrix(st)
        elements = {elem1, elem2}
        top_dist = 0
        heavy_st = self._getHeavyStructure(st)
        ele_map = {int(atom): atom.element for atom in heavy_st.atom}
        for i, j in self._getAtomPairs(st):
            if {ele_map[i], ele_map[j]} == elements:
                top_dist += distance_matrix[i - 1, j - 1]
        return top_dist

    @cached_topo_descriptor
    def _getBaryszWeinerTypeIndex(self, st):
        """
        Handbook p. 489: index calculated by applying the Wiener operator to
        the Barysz distance matrix.
        """
        return self._wienerOperator(self._getBaryszDistanceMatrix(st))

    @cached_topo_descriptor
    def _getElectronegativityWienerTypeIndex(self, st):
        """
        Handbook p. 489: index calculated by applying the Wiener operator to
        the electronegativities weighted distance matrix.
        """
        distance_matrix = self._getElectronegativityDistanceMatrix(st)
        return self._wienerOperator(distance_matrix)

    @cached_topo_descriptor
    def _getMassWienerTypeIndex(self, st):
        """
        Handbook p. 489: index calculated by applying the Wiener operator to
        the atomic mass weighted distance matrix.
        """
        return self._wienerOperator(self._getMassDistanceMatrix(st))

    @cached_topo_descriptor
    def _getVDWVolumeWienerTypeIndex(self, st):
        """
        Handbook p. 489: index calculated by applying the Wiener operator to
        the van der Waals volume weighted distance matrix.
        """
        return self._wienerOperator(self._getVDWVolumeDistanceMatrix(st))

    @cached_topo_descriptor
    def _getPolarizabilityWienerTypeIndex(self, st):
        """
        Handbook p. 489: index calculated by applying the Wiener operator to
        the polarizability weighted distance matrix.
        """
        distance_matrix = self._getPolarizabilityDistanceMatrix(st)
        return self._wienerOperator(distance_matrix)

    @cached_topo_descriptor
    def _getBaryszIndex(self, st):
        """
        Handbook p. 489: Barysz index (J_het) -- a generalization of the
        Balaban distance connectivity index calculated by applying the
        Ivanciuc-Balaban operator to the Barysz distance matrix.
        """
        return self._balbanOperator(st, self._getBaryszDistanceMatrix(st))

    @cached_topo_descriptor
    def _getElectronegativityBalabanTypeIndex(self, st):
        """
        Handbook p. 489: index calculated by applying the Ivanciuc-Balaban
        operator to the electronegativities weighted distance matrix.
        """
        distance_matrix = self._getElectronegativityDistanceMatrix(st)
        return self._balbanOperator(st, distance_matrix)

    @cached_topo_descriptor
    def _getMassBalabanTypeIndex(self, st):
        """
        Handbook p. 489: index calculated by applying the Ivanciuc-Balaban
        operator to the atomic mass  weighted distance matrix.
        """
        return self._balbanOperator(st, self._getMassDistanceMatrix(st))

    @cached_topo_descriptor
    def _getVDWVolumeBalabanTypeIndex(self, st):
        """
        Handbook p. 489: index calculated by applying the Ivanciuc-Balaban
        operator to the van der Waals volume weighted distance matrix.
        """
        return self._balbanOperator(st, self._getVDWVolumeDistanceMatrix(st))

    @cached_topo_descriptor
    def _getPolarizabilityBalabanTypeIndex(self, st):
        """
        Handbook p. 489: index calculated by applying the Ivanciuc-Balaban
        operator to the polarizability weighted distance matrix.
        """
        distance_matrix = self._getPolarizabilityDistanceMatrix(st)
        return self._balbanOperator(st, distance_matrix)

    @cached_topo_descriptor
    def _getTopologicalDiameter(self, st):
        """
        Handbook p. 112: topological diameter (D) -- defined as the maximum
        atom eccentricity
        """
        return max([
            self._getAtomEccentricity(st, i) for i in self._getAtomIndices(st)
        ])

    @cached_topo_descriptor
    def _getTopologicalRadius(self, st):
        """
        Handbook p. 112: topological radius (R) -- defined as the minimum atom
        eccentricity
        """
        return min([
            self._getAtomEccentricity(st, i) for i in self._getAtomIndices(st)
        ])

    @cached_topo_descriptor
    def _getPetitjeanShapeIndex(self, st):
        """
        Handbook p. 390: Petitjean shape index (I_2) -- a topological anisometry
        descriptor, also called a graph-theoretical shape coefficient.
        """
        self._assertMultipleHeavyAtoms(st)  # prevent divide by zero
        diameter = self._getTopologicalDiameter(st)
        radius = self._getTopologicalRadius(st)
        return old_div((diameter - radius), float(radius))

    @cached_topo_descriptor
    def _getBalabanDistanceConnectivityIndex(self, st):
        """
        Handbook p. 21: Balaban distance connectivity index (J) -- defined in
        terms of sums over each i th row of the distance matrix as::

            B/(C+1) * sum_bonds[ (vertex dist_i * vertex dist_j) ^ -0.5 ]

        It is also called distance connectivity index or average distance sum
        connectivity.
        """
        return self._balbanOperator(st, self._getDistanceMatrix(st))

    @cached_topo_descriptor
    def _getSolvationConnectivityIndex(self, st, m):
        """
        Handbook p. 88: solvation connectivity indices (m chi^s_q) -- descriptor
        defined in order to model solvation entropy and describe dispersion
        interactions in solution.
        """
        self._assertMultipleHeavyAtoms(st)  # prevent pow(0, exponent)
        self._assertPathLength(st, m)
        connectivity_index = 0.0
        atoms = self._getHeavyStructure(st).atom
        for path in self._getConnectivityPaths(st)[m]:
            L = [mm.mmelement_get_period(atoms[i].atomic_number) for i in path]
            delta = [self._getVertexDegree(st, i) for i in path]
            connectivity_index += old_div(numpy.prod(L),
                                          pow(numpy.prod(delta), 0.5))
        return (old_div(1.0, pow(2, m + 1))) * connectivity_index

    @cached_topo_descriptor
    def _getConnectivityIndex(self, st, m):
        """
        Handbook p. 85: connectivity indices of mth order -- usually known as
        Kier-Hall connectivity indices, defined a general scheme based on the
        Randic index to also calculate zero-order and higher-order descriptors.

        :param m: index order of the connectivity index
        :type m: int
        """
        vertex_func = self._getVertexDegree
        return self._generalizedConnectivityIndex(st, m, vertex_func, -0.5)

    @cached_topo_descriptor
    def _getMeanConnectivityIndex(self, st, m):
        """
        Handbook p. 85: mean connectivity indices of mth order -- the Handbook
        only describes the mean Randic connectivity index (m=1), defined as the
        Randic connectivity index divided by the number of bonds. Here, we
        extrapolate to zero and higher orders

        :param m: index order of the connectivity index
        :type m: int
        """
        connectivity_index = float(self._getConnectivityIndex(st, m))
        return old_div(connectivity_index,
                       len(self._getConnectivityPaths(st)[m]))

    @cached_topo_descriptor
    def _getRDCHIIndex(self, st):
        """
        Handbook p. 116: RDCHI index -- topological index based on a
        Randic-type formula, which increases with molecular size but decreases
        with molecular branching.
        """
        RDCHI = 0.0
        for i, j in self._getBondIndices(st):
            RDS_i = self._getReciprocalDistanceSum(st, i)
            RDS_j = self._getReciprocalDistanceSum(st, j)
            RDCHI += pow(RDS_i * RDS_j, -0.5)
        return RDCHI

    @cached_topo_descriptor
    def _getRDSQIndex(self, st):
        """
        Handbook p. 116: RDSQ index -- topological index based on a
        Randic-type formula, which increases with both molecular size and
        molecular branching.
        """
        RDSQ = 0.0
        for i, j in self._getBondIndices(st):
            RDS_i = self._getReciprocalDistanceSum(st, i)
            RDS_j = self._getReciprocalDistanceSum(st, j)
            RDSQ += pow(RDS_i * RDS_j, 0.5)
        return RDSQ

    @cached_topo_descriptor
    def _getKierAlphaModifiedShapeIndex(self, st, m):
        """
        Handbook p. 249: Kier alpha-modified shape descriptor (m kappa_alpha) --
        descriptor defined in terms of the number of graph vertices A and the
        number of paths with length (m = 1,2,3). The alpha-modified version
        takes into account the different shape contribution of heteroatoms and
        hybridization states.
        """
        self._assertPathLength(st, m)

        # alpha values indexed by atomic number and hybridization state
        # Handbook p. 250, Table K-1
        R_Csp3 = 0.77
        alpha_dict = {
            (6, canvas.Sp3): (old_div(R_Csp3, R_Csp3)) - 1.0,
            (6, canvas.Sp2): (old_div(0.67, R_Csp3)) - 1.0,
            (6, canvas.Sp): (old_div(0.60, R_Csp3)) - 1.0,
            (7, canvas.Sp3): (old_div(0.74, R_Csp3)) - 1.0,
            (7, canvas.Sp2): (old_div(0.62, R_Csp3)) - 1.0,
            (7, canvas.Sp): (old_div(0.55, R_Csp3)) - 1.0,
            (8, canvas.Sp3): (old_div(0.74, R_Csp3)) - 1.0,
            (8, canvas.Sp2): (old_div(0.62, R_Csp3)) - 1.0,
            (9, canvas.Sp3): (old_div(0.72, R_Csp3)) - 1.0,
            (15, canvas.Sp3): (old_div(1.10, R_Csp3)) - 1.0,
            (15, canvas.Sp2): (old_div(1.00, R_Csp3)) - 1.0,
            (16, canvas.Sp3): (old_div(1.04, R_Csp3)) - 1.0,
            (16, canvas.Sp2): (old_div(0.94, R_Csp3)) - 1.0,
            (17, canvas.Sp3): (old_div(0.99, R_Csp3)) - 1.0,
            (35, canvas.Sp3): (old_div(1.14, R_Csp3)) - 1.0,
            (53, canvas.Sp3): (old_div(1.33, R_Csp3)) - 1.0,
        }

        # parameter derived from the ration of the covalent raidus relative
        # to the sp3 carbon atom
        alpha = 0.0
        for a in self._getChmMol(st).getHeavyAtoms():
            atom_key = (a.getAtomicNumber(), a.getHybridization())
            try:
                alpha += alpha_dict[atom_key]
            except KeyError:
                raise TopoError("No alpha defined for atom %d %d" % atom_key)

        A = self._getAtomCount(st)
        A_alpha = A + alpha
        if m == 1:
            minmax_path_count = A_alpha * pow(A_alpha - 1, 2)
        elif m == 2:
            minmax_path_count = (A_alpha - 1) * pow(A_alpha - 2, 2)
        elif m == 3 and A % 2 == 0:  # even number of atoms
            minmax_path_count = (A_alpha - 3) * pow(A_alpha - 2, 2)
        elif m == 3 and A % 2 != 0:  # odd number of atoms
            minmax_path_count = (A_alpha - 1) * pow(A_alpha - 3, 2)
        else:
            raise TopoError("Unknown m value %d" % m)

        num_paths = len(self._getConnectivityPaths(st)[m]) + alpha
        return old_div(float(minmax_path_count), pow(num_paths, 2))

    @cached_topo_descriptor
    def _getKierMolecularFlexibilityIndex(self, st):
        """
        Handbook p. 178: Kier molecular flexibility index (fi) -- a measure of
        molecular flexibility derived from the Kier alpha-modified shape
        descriptors, kappa1 encodes information about the count of atoms and
        relative cyclicity of molecules, while kappa2 encodes information about
        branching or relative spatial density of molecules.
        """
        kappa1 = self._getKierAlphaModifiedShapeIndex(st, 1)
        kappa2 = self._getKierAlphaModifiedShapeIndex(st, 2)
        return kappa1 * kappa2 / self._getAtomCount(st)

    @cached_topo_descriptor
    def _getMolecularPathWalkIndex(self, st, m):
        """
        Handbook p. 393: Molecular path/walk indices -- the average sum of
        atomic path/walk indices of equal length. Obtained by separately
        summing all the paths and walks of the same length, and then calculating
        the ratio between their counts.
        """
        self._assertPathLength(st, m)
        paths = self._getConnectivityPaths(st)[m]
        awc = self._getAtomicWalkCounts(st, m)
        path_walk_sum = 0.0
        for i in self._getAtomIndices(st):
            # Number of paths of length m from atom i
            atom_path_count = len([p for p in paths if i in (p[0], p[m])])
            path_walk_sum += old_div(atom_path_count, float(awc[i - 1]))
        return old_div(path_walk_sum, self._getAtomCount(st))

    @cached_topo_descriptor
    def _getEStateIndex(self, st):
        """
        Molecular Descriptors p. 42: E-state topological parameter (TI^E) --
        derived by applying the Ivanciuc-Balaban operator to the E-state index
        values. It has to be pointed out that the proposed formula for the
        E-state topological parameter cannot be used for every molecule because
        it presents two drawbacks: (1) it cannot be calculated when there exists
        one atom in the molecule with negative E-state value; (2) it assumes
        very large values even when one S value tends to zero. To overcome
        these drawbacks of the original formula, an alternative formula
        (adopted in the DRAGON descriptors) is used here.
        """
        B = self._getBondCount(st)
        C = self._getCyclomaticNumber(st)
        # exp(E-state index) = exp(I_i + Delta I_i)
        exp_S = [
            math.exp(sum(x)) for x in zip(self._getFieldEffects(st),
                                          self._getIntrinsicStates(st))
        ]
        dist_sum = 0.0
        for i, j in self._getBondIndices(st):
            S_i, S_j = exp_S[i - 1], exp_S[j - 1]
            dist_sum += pow(1.0 + (S_i * S_j), -0.5)
        return (old_div(float(B), (C + 1))) * dist_sum

    @cached_topo_descriptor
    def _getChiralAtomCount(self, st):
        """
        Unknown source: number of chiral atoms -- count chiral atoms labeled
        with either R or S (ignoring ANR and ANS).

        _getFullStructure needs to be avoided here, since H atom indexes would
        be altered, and Chirality labels will be invalidaded for 1D structures.
        """
        st_copy = st.copy()
        build.add_hydrogens(st_copy)
        chiral_atoms = analyze.get_chiral_atoms(st_copy)
        return len([c for c in chiral_atoms.values() if c in ("R", "S")])

    @cached_topo_descriptor
    def _getRingCount(self, st, n):
        """
        Adopted from DRAGON: number of rings of heavy atom count n
        """
        ring_lengths = [len(ring) for ring in self._getHeavyStructure(st).ring]
        return ring_lengths.count(n)

    @cached_topo_descriptor
    def _getAtomCount(self, st):
        """
        Handbook p. 16: atom number (A) -- defined as the total number of atoms
        in a molecule, which refers only to non-hydrogen atoms (atom count).
        """
        return self._getHeavyStructure(st).atom_total

    @cached_topo_descriptor
    def _getBondCount(self, st):
        """
        Handbook p. 28: bond number (B) -- defined as the number of bonds in
        the molecule (edge counting; bond count).
        """
        return len(self._getHeavyStructure(st).bond)

    @cached_topo_descriptor
    def _getRingAtomCount(self, st):
        """
        Molecular Descriptors p. 655: (A_R) -- the total number of atoms
        belonging to rings
        """
        ring_atoms = [
            index for ring in self._getHeavyStructure(st).ring
            for index in ring.getAtomIndices()
        ]
        return len(set(ring_atoms))

    @cached_topo_descriptor
    def _getRingBondCount(self, st):
        """
        Molecular Descriptors p. 655: (B_R) -- the total number of bonds
        belonging to rings
        """
        ring_bonds = [
            sorted([bond_edge.atom1.index, bond_edge.atom2.index])
            for ring in self._getHeavyStructure(st).ring
            for bond_edge in ring.edge
        ]
        return len(set([tuple(bond) for bond in ring_bonds]))

    @cached_topo_descriptor
    def _getNumberRingSystems(self, st):
        """
        Molecular Descriptors p. 655: number of ring systems (NRS)
        """
        return (self._getBondCount(st) - self._getRingBondCount(st)) - \
               (self._getAtomCount(st) - self._getRingAtomCount(st)) + 1

    @cached_topo_descriptor
    def _getNormalizedNumberRingSystems(self, st):
        """
        Molecular Descriptors p. 655: normalized number of ring systems (NRS*)
        by the cyclomatic number.
        """
        C = self._getCyclomaticNumber(st)
        if not C:
            raise TopoError("No ring systems present")
        return old_div(self._getNumberRingSystems(st), float(C))

    @cached_topo_descriptor
    def _getRingFusionDegree(self, st):
        """
        Molecular Descriptors p. 655: ring fusion degree (RFD) -- the
        reciprocal of the normalized number of ring systems.
        """
        return old_div(1.0, self._getNormalizedNumberRingSystems(st))

    @cached_topo_descriptor
    def _getTotalRingSize(self, st):
        """
        Molecular Descriptors p. 656: total ring size (R) -- the sum of the
        ring size of all the single cycles of all the ring systems; in this
        case, the atoms belonging to fused connections are counted twice.
        """
        ring_atoms = [
            index for ring in self._getHeavyStructure(st).ring
            for index in ring.getAtomIndices()
        ]
        return len(ring_atoms)

    @cached_topo_descriptor
    def _getRingPerimeter(self, st):
        """
        Molecular Descriptors p. 656: ring perimeter (R_P) -- represents the
        perimeter of all the rings present in the molecule.
        """
        return 2 * self._getRingBondCount(st) - self._getTotalRingSize(st)

    @cached_topo_descriptor
    def _getRingBridges(self, st):
        """
        Molecular Descriptors p. 656: ring bridges (R_B) -- represents the
        number of bridge edges.
        """
        return self._getTotalRingSize(st) - self._getRingBondCount(st)

    @cached_topo_descriptor
    def _getMolecularCyclizedDegree(self, st):
        """
        Molecular Descriptors p. 656: molecular cyclized degree (MCD) --
        ratio of ring atoms to total atoms.
        """
        return old_div(self._getRingAtomCount(st),
                       float(self._getAtomCount(st)))

    @cached_topo_descriptor
    def _getRingFusionDensity(self, st):
        """
        Molecular Descriptors p. 656: ring fusion density (RF Delta)
        """
        ring_atom_count = self._getRingAtomCount(st)
        if not ring_atom_count:
            raise TopoError("No ring systems present")
        return 2.0 * self._getRingBridges(st) / ring_atom_count

    @cached_topo_descriptor
    def _getRingComplexityIndex(self, st):
        """
        Molecular Descriptors p. 656: ring complexity index (C_R)
        """
        ring_atom_count = self._getRingAtomCount(st)
        if not ring_atom_count:
            raise TopoError("No rings present")
        return old_div(self._getTotalRingSize(st),
                       float(self._getRingAtomCount(st)))

    @cached_topo_descriptor
    def _getTotalVDWSurfaceArea(self, st):
        """
        Molecular Descriptors p. 609: Total VSA for all atoms
        """
        return sum(self._getVDWSurfaceArea(st))

    @cached_topo_descriptor
    def _getPVSAMolarRefractivity(self, st, n):
        """
        Molecular Descriptors p. 611: P_VSA based on molar refractivity.
        Property interval boundaries extracted from Table P16, p. 611
        """
        check_canvas_license()
        mr_typer = canvas_base.ChmAtomTyperMR()
        value_iterator = mr_typer.valueType(self._getChmMol(st), False)

        mr_values = [value.getValue() for value in value_iterator]
        intervals = [0.11, 0.26, 0.35, 0.39, 0.44, 0.485, 0.56]
        return self._propertyRangeVDWSurfaceArea(st, mr_values, intervals, n)

    @cached_topo_descriptor
    def _getPVSAlogP(self, st, n):
        """
        Molecular Descriptors p. 611: P_VSA based on log P values, to capture
        hydrophobic and hydrophillic effect. Property interval boundaries
        extracted from Table P16, p. 611
        """
        check_canvas_license()
        alogp_typer = canvas_base.ChmAtomTyperAlogP()
        value_iterator = alogp_typer.valueType(self._getChmMol(st), False)

        alogp_values = [value.getValue() for value in value_iterator]
        intervals = [-0.4, -0.2, 0.0, 0.1, 0.15, 0.2, 0.25, 0.3, 0.4]
        return self._propertyRangeVDWSurfaceArea(st, alogp_values, intervals, n)

    @cached_topo_descriptor
    def _getPVSAPartialCharges(self, st, n):
        """
        Molecular Descriptors p. 611: P_VSA based on Gasteiger charges, ie.
        partial equalization of orbital electronegativity (PEOE). Property
        interval boundaries extracted from Table P16, p. 611
        """
        check_canvas_license()
        gasteiger = canvas_base.ChmGasteigerCharge()
        try:
            gasteiger.calcGasteigerCharges(self._getChmMol(st))
        # TODO: Currently throws RuntimeError, need to wrap ChmException
        except RuntimeError as e:
            raise TopoError(str(e).strip())

        peoe_values = gasteiger.getCharges()
        intervals = [
            -0.3, -0.25, -0.2, -0.15, -0.1, -0.05, 0.0, 0.05, 0.1, 0.15, 0.2,
            0.25, 0.3
        ]
        return self._propertyRangeVDWSurfaceArea(st, peoe_values, intervals, n)

    # =========================================================================
    # Intermediate descriptor methods
    # =========================================================================

    @cached_topo_descriptor
    def _getHeavyStructure(self, st):
        """
        Handbook p. 313: Hydrogen-depleted molecule.
        """
        st_copy = st.copy()
        build.delete_hydrogens(st_copy)
        return st_copy

    @cached_topo_descriptor
    def _getFullStructure(self, st):
        """
        Fully hydrogenated molecule.
        """
        st_copy = self._getHeavyStructure(st).copy()
        build.add_hydrogens(st_copy)
        return st_copy

    @cached_topo_descriptor
    def _getChmMol(self, st):
        check_canvas_license()
        adaptor = canvas_base.ChmMmctAdaptor()
        return adaptor.create(int(self._getFullStructure(st)))

    @cached_topo_descriptor
    def _getHydrogenCount(self, st, heavy_atom_index):
        """
        Handbook p. 474: (h_i) -- the number of hydrogen atoms bonded to a
        given heavy atom.

        :param heavy_atom_index: 1-based index of the heavy atom structure
        :type heavy_atom_index: int
        """
        # This works because hydrogens are added to the HeavyStructure, which
        # appends them and does not change the heavy atom indices
        full_st_atom = self._getFullStructure(st).atom[heavy_atom_index]
        return sum(bond.atom2.atomic_number == 1 for bond in full_st_atom.bond)

    @cached_topo_descriptor
    def _getAtomIndices(self, st):
        """
        Returns list of atom indices, which are 1-based.
        """
        return list(range(1, self._getHeavyStructure(st).atom_total + 1))

    @cached_topo_descriptor
    def _getAtomPairs(self, st):
        """
        Returns list of all pairs of atom indices, which are 1-based.
        """
        n = self._getAtomCount(st)
        return [(i, j) for i in range(1, n + 1) for j in range(i + 1, n + 1)]

    @cached_topo_descriptor
    def _getBondIndices(self, st):
        """
        Returns list of bond index tuples, which are 1-based.
        Sorts indices (for comparision purposes)
        """
        return [
            tuple(sorted([bond.atom1.index, bond.atom2.index]))
            for bond in self._getHeavyStructure(st).bond
        ]

    @cached_topo_descriptor
    def _getAdjacencyMatrix(self, st):
        """
        Handbook p. 2: adjacency matrix (A) -- represents the whole set of
        connections between adjacent pairs of atoms. The entries a_ij of the
        matrix equal one if vertices vi and vj are bonded, and zero otherwise.
        The adjacency matrix is symmetric with dimension A x A , where A is the
        number of atoms and it is derived from an H-depleted molecular graph.
        """
        n = self._getAtomCount(st)
        adjmat = numpy.zeros((n, n), dtype=int)
        for i, j in self._getBondIndices(st):
            adjmat[i - 1][j - 1] = adjmat[j - 1][i - 1] = 1
        return adjmat

    @cached_topo_descriptor
    def _getVertexDegree(self, st, atom_index):
        """
        Handbook p. 474, 2: vertex degree (delta_i) -- the ith row sum of the
        adjacency matrix.

        :param atom_index: 1-based atomic index
        :type atom_index: int
        """
        return sum(self._getAdjacencyMatrix(st)[atom_index - 1])

    @cached_topo_descriptor
    def _getSimpleValenceVertexDegree(self, st, atom_index):
        """
        Handbook p. 474: valence vertex degree (delta^v_i) -- vertex degree
        taking into account all valence electrons of the ith atom. A simplified
        version of the equation below.

        :param atom_index: 1-based atomic index
        :type atom_index: int
        """
        atom = self._getHeavyStructure(st).atom[atom_index]
        Zv_i = mm.mmelement_get_valence_count(atom.atomic_number)
        h_i = self._getHydrogenCount(st, atom_index)
        return float(Zv_i - h_i)

    @cached_topo_descriptor
    def _getValenceVertexDegree(self, st, atom_index):
        """
        Handbook p. 474: valence vertex degree (delta^v_i) -- vertex degree
        taking into account all valence electrons of the ith atom. It encodes
        the electronic identity of the atom in terms of both valence electron
        and core electron counts. Given as:  (Z^v_i - h_i) / (Z_i - Z^v_i - 1)
        For atoms of higher principal quantum levels (P, S, Cl, Br, I), Kier
        and Hall proposed to account for both valence and nonvalence electrons

        Z^v_i = number of valence electrons of ith atom
        h_i = number of hydrogens bonded to atom
        Z_i = total number of electrons of ith atom (atomic number)

        :param atom_index: 1-based atomic index
        :type atom_index: int
        """
        atom = self._getHeavyStructure(st).atom[atom_index]
        Zv_i = mm.mmelement_get_valence_count(atom.atomic_number)
        h_i = self._getHydrogenCount(st, atom_index)
        return old_div(float(Zv_i - h_i), (atom.atomic_number - Zv_i - 1))

    @cached_topo_descriptor
    def _getConnectivityGraph(self, st):
        """
        Build undirected graph corresponding to molecular connectivity.
        """
        graph = networkx.Graph()
        graph.add_edges_from(self._getBondIndices(st))
        return graph

    @cached_topo_descriptor
    def _getConnectivityGraphWithWeights(self, st, carbon_weight, atom_weights):
        """
        Build a graph representation of a ligand structure, weighted according
        to the rules specified for building the multigraph distance matrix
        (inverse bond order).

        :param st: a ligand structure
        :type st: structure.Structure
        :param carbon_weight: weight assigned to carbon atoms
        :type carbon_weight: float
        :param atom_weights: weight assigned to each heavy atom in the ligand;
                should have length equal to the number of heavy atoms in the
                ligand
        :type atom_weights: tuple[float]
        :return: a weighted graph representation of the ligand `st`
        :rtype: networkx.Graph
        """

        graph = networkx.Graph()
        heavy_st = self._getHeavyStructure(st)
        weight_c2 = carbon_weight * carbon_weight
        for idx1, idx2 in self._getBondIndices(st):
            weight1, weight2 = atom_weights[idx1 - 1], atom_weights[idx2 - 1]
            bond_order = self._getConventionalBondOrder(st, idx1, idx2)
            weight = weight_c2 / (bond_order * weight1 * weight2)
            graph.add_edge(idx1, idx2, weight=weight)
        return graph

    @cached_topo_descriptor
    def _getDistanceMatrix(self, st):
        """
        Handbook p. 112: distance matrix (D) -- matrix of all topological
        distances between all the atom pairs. Also known as the vertex distance
        matrix. The topological distance d_ij the number of edges in the
        shortest path between the vertices vi and vi; the off-diagonal entries
        of the distance matrix equal one if vertices vi and v, are adjacent
        (i.e. the atoms i and j are bonded) and are greater than one otherwise.
        The diagonal elements are of course equal to zero. The distance matrix
        is symmetric with dimension A x A, where A is the number of atoms, and
        is derived from the H-depleted molecular graph.
        """
        n = self._getAtomCount(st)
        dist_mat = numpy.zeros((n, n), dtype=int)
        graph = self._getConnectivityGraph(st)
        for i, j in self._getAtomPairs(st):
            path_length = networkx.shortest_path_length(graph, i, j)
            dist_mat[i - 1][j - 1] = dist_mat[j - 1][i - 1] = path_length
        return dist_mat

    @cached_topo_descriptor
    def _getReciprocalDistanceMatrix(self, st):
        """
        Handbook p. 112: reciprocal distance matrix (D^-1) -- a square
        symmetric A x A matrix derived from the distance matrix where each
        off-diagonal element is the reciprocal of the topological distance d
        between the considered vertices (and 0 for d_ii).
        """
        return matrix_reciprocal(self._getDistanceMatrix(st))

    @cached_topo_descriptor
    def _getReciprocalSquareDistanceMatrix(self, st):
        """
        Handbook p. 112: reciprocal square distance matrix (D^-2) -- a square
        symmetric A x A matrix derived from the distance matrix where each
        off-diagonal element is the reciprocal of the topological distance d
        between the considered vertices (and 0 for d_ii).
        """
        return numpy.power(self._getReciprocalDistanceMatrix(st), 2)

    @cached_topo_descriptor
    def _getVertexDistanceDegree(self, st, atom_index):
        """
        Handbook p. 113, vertex distance degree (sigma_i) -- the ith row sum of
        the distance matrix. Also known as the distance number, distance index,
        distance rank, vertex distance sum, or distance of a vertex.

        :param atom_index: 1-based atomic index
        :type atom_index: int
        """
        return sum(self._getDistanceMatrix(st)[atom_index - 1])

    @cached_topo_descriptor
    def _getAtomEccentricity(self, st, atom_index):
        """
        Handbook p. 112, atom eccentricity (nu_i) -- maximum value entry in the
        ith row of the distance matrix, i.e. the maximum distance from theith
        vertex to any other vertices (vertex eccentricity).

        :param atom_index: 1-based atomic index
        :type atom_index: int
        """
        return max(self._getDistanceMatrix(st)[atom_index - 1])

    @cached_topo_descriptor
    def _getDistanceCount(self, st, distance):
        """
        Handbook p. 113, graph distance count (f^k) -- total number of
        distances in the graph equal to k

        :param distance: bonded distance (kth-order)
        :type distance: int
        """
        distance_count = Counter(self._getDistanceMatrix(st).flatten())
        return 0.5 * distance_count[distance]

    @cached_topo_descriptor
    def _getReciprocalDistanceSum(self, st, atom_index):
        """
        Handbook p. 116: reciprocal distance sum (RDS_i) -- sum of the
        reciprocal distance matrix elements in the ith row.

        :param atom_index: 1-based atomic index
        :type atom_index: int
        """
        return sum(self._getReciprocalDistanceMatrix(st)[atom_index - 1])

    @cached_topo_descriptor
    def _getConnectivityPaths(self, st):
        """
        Handbook p. 85, all shortest paths keyed by path lengths. The paths
        are equivalent to mth order subgraphs in the molecular structure.
        """

        connectivity_paths = defaultdict(list)
        # Zero-th order paths are the atoms to themselves: [index, index]
        connectivity_paths[0] = [[i] for i in self._getAtomIndices(st)]
        # Higher order paths are taken from shortest paths
        graph = self._getConnectivityGraph(st)
        for i, j in self._getAtomPairs(st):
            for path_ij in networkx.all_simple_paths(graph, i, j, cutoff=5):
                connectivity_paths[len(path_ij) - 1].append(path_ij)
        return connectivity_paths

    @cached_topo_descriptor
    def _getCyclomaticNumber(self, st):
        """
        Handbook p. 94, cyclomatic number -- the number of independent cycles
        (or rings), and, more exactly, the number of non-overlapping cycles.

        Should be equivalent to len(st.ring)
        """
        return self._getBondCount(st) - self._getAtomCount(st) + 1

    @cached_topo_descriptor
    def _getIntrinsicStates(self, st):
        """
        Handbook p. 159: intrinsic state (I_i) -- calculated from the principal
        quantum number, the number of valence electrons, and the number of
        sigma electrons of the ith atom in the H-depleted molecular graph.

        NOTE: Molecular Descriptors p. 284, Table E11 and DRAGON use the simple valence
              vertex degree when calculating the instrisic state.
        """
        self._assertMultipleHeavyAtoms(st)  # prevent divide by zero
        intrinsic_states = []
        heavy_st = self._getHeavyStructure(st)
        for i in self._getAtomIndices(st):
            atom = heavy_st.atom[i]
            L_i = mm.mmelement_get_period(atom.atomic_number)
            v_i = self._getSimpleValenceVertexDegree(st, i)
            d_i = self._getVertexDegree(st, i)
            intrinsic_states.append(
                old_div((pow(old_div(2.0, L_i), 2) * v_i + 1), d_i))
        return intrinsic_states

    @cached_topo_descriptor
    def _getFieldEffects(self, st, k=2):
        """
        Handbook p. 159: field effect (Delta I_i) -- calculated as perturbation
        of the intrinsic state of ith atom by all other atoms in the molecule.
        The exponent k is a parameter to modify the influence of distant or
        nearby atoms for particular studies. Usually it is taken as k = 2.
        """
        intrinsic_states = self._getIntrinsicStates(st)
        field_effect = []
        atom_count = self._getAtomCount(st)
        dist_matrix = self._getDistanceMatrix(st)
        for i in range(atom_count):
            field_effect_i = 0.0
            for j in range(atom_count):
                I_diff = intrinsic_states[i] - intrinsic_states[j]
                d_ij = dist_matrix[i][j]
                field_effect_i += old_div(float(I_diff), pow(d_ij + 1.0, k))
            field_effect.append(field_effect_i)
        return field_effect

    @cached_topo_descriptor
    def _getLaplacianEigenvalues(self, st):
        """
        Handbook p. 253: laplacian eigenvalues (lambda_i) -- from the Laplacian
        matrix, defined as the difference between the vertex degree matrix and
        the adjacency matrix. The diagonalization of the Laplacian matrix gives
        A real eigenvalues hi which constitute the Laplacian spectrum. All
        eignevalues are (a) non-negative numbers, (b) the last value is always
        zero, and (c) the second to last is great than zero if the graph is
        connected.
        """
        # Construct Laplacian matrix; L = V - A
        lap_matrix = -1 * self._getAdjacencyMatrix(st)
        for i in self._getAtomIndices(st):
            lap_matrix[i - 1, i - 1] = self._getVertexDegree(st, i)
        # Diagonalize the matrix and return sorted eigenvalues
        eigenvalues = list(numpy.linalg.eigh(lap_matrix)[0])
        # Check that A-1 eignevalues exist
        if not eigenvalues[:-1]:
            raise TopoError("No Laplacian eigenvalues available")
        return sorted(eigenvalues, reverse=True)

    @cached_topo_descriptor
    def _getChargeTermMatrix(self, st):
        """
        Handbook p. 445: charge term matrix (CT) -- an unsymmetric matrix of
        charge terms that are graph invariants related to the charge transfer
        between the pair of considered vertices. The diagonal entries of the CT
        matrix represent the topological valence of the atoms; the off-diagonal
        entries CT_ij represent a measure of the net charge transferred from the
        atom j to the atom i.
        """
        # Galvez matrix; M = A * D^-2
        galvex_matrix = numpy.dot(self._getAdjacencyMatrix(st),
                                  self._getReciprocalSquareDistanceMatrix(st))
        # CT = diag(M) + [M - M^T]
        return numpy.diag(numpy.diag(galvex_matrix)) + (
            galvex_matrix - numpy.transpose(galvex_matrix))

    @cached_topo_descriptor
    def _getAromaticBonds(self, st):
        """
        Collects all bonds in the heavy atom structure that are aromatic.

        :return: tuple of aromatic bonds
        :rtype: tuple of `structure.StructureBond`
        """
        adaptor = canvas.ChmMmctAdaptor()
        heavy_atom_st = self._getHeavyStructure(st)
        aromatic_info = adaptor.getAromaticityInfo(heavy_atom_st.handle)

        aromatic_bonds = []
        for i, j in self._getBondIndices(st):
            if aromatic_info[i - 1] and aromatic_info[j - 1]:
                aromatic_bonds.append(tuple(sorted([i, j])))
        return tuple(aromatic_bonds)

    @cached_topo_descriptor
    def _getConventionalBondOrder(self, st, i, j):
        """
        Handbook p. 28: conventional bond order (sigma^*) -- within the
        framework of the graph theory, specifically the multigraph, this is
        defined as being equal to 1, 2, 3, and 1.5 for single, double, triple
        and aromatic bonds, respectively. Define zero-order bonds as 1, as to be
        valid with multigraph distance weight and vdW surface area correction.
        """
        search_indices = tuple(sorted((i, j)))
        for bond in self._getHeavyStructure(st).bond:
            indices = tuple(sorted([bond.atom1.index, bond.atom2.index]))
            if indices == search_indices:
                # Assume aromatic bond are 1.5, as per handbook
                if indices in self._getAromaticBonds(st):
                    return 1.5
                # Return 1 for zero-order bonds
                if not bond.order:
                    return 1
                # Otherwise use the intenger bond order
                return bond.order
        raise TopoError("No bond exists between atoms %d and %d" % (i, j))

    @cached_topo_descriptor
    def _getMultigraphDistanceMatrix(self, st, carbon_weight, atom_weights):
        """
        Handbook p. 488: multigraph distance matrix (``*D``) -- a weighted
        distance matrix where the distance from vertex vi to vertex vj is
        obtained by counting the edges in the shortest path between them, where
        each edge counts as the inverse of the conventional bond order, i.e.
        the relative topological distance, and therefore contributes 1 / bond
        order to the overall path length.

        :param carbon_weight: weight for the carbon atom
        :type carbon_weight: float

        :param atom_weights: tuple of weights for each atom
        :type atom_weights: tuple of float
        """

        atom_count = self._getAtomCount(st)
        dist_mat = numpy.zeros((atom_count, atom_count), dtype=float)

        # Diagonal elements are defined as 1 - (X_carbon / X_i)
        for i, weight_i in enumerate(atom_weights):
            dist_mat[i][i] = 1.0 - (carbon_weight / float(weight_i))

        # Off-diagonal elements are the shortest weighted path length between
        # atoms
        graph = self._getConnectivityGraphWithWeights(st, carbon_weight,
                                                      atom_weights)
        dist_map = dict(networkx.shortest_path_length(G=graph, weight='weight'))
        for idx1, idx2 in self._getAtomPairs(st):
            dist_mat[idx1 - 1][idx2 - 1] = dist_map[idx1][idx2]
            dist_mat[idx2 - 1][idx1 - 1] = dist_map[idx2][idx1]
        return dist_mat

    @cached_topo_descriptor
    def _getBaryszDistanceMatrix(self, st):
        """
        Handbook p. 488: Barysz distance matrix (D^Z) -- a weighted distance
        matrix accounting simultaneously for the presence of heteroatoms and
        multiple bonds in the molecule.
        """
        Z_carbon = 6
        Z_weights = [a.atomic_number for a in self._getHeavyStructure(st).atom]
        return self._getMultigraphDistanceMatrix(st, Z_carbon, tuple(Z_weights))

    @cached_topo_descriptor
    def _getElectronegativityDistanceMatrix(self, st):
        """
        Molecular Descriptors p. 909: generalized Ivanciuc weighting from the Barysz distance
        matrix with Sanderson electronegativities
        See: Handbook p. 488, Barysz distance matrix
        """
        # Sanderson electronegativity (chi^SA) indexed by atomic number
        # MMolecular Descriptors p. 22, Table A3
        electronegativity = {
            5: 2.275,
            6: 2.746,
            7: 3.194,
            8: 3.654,
            9: 4.000,
            13: 1.714,
            14: 2.138,
            15: 2.515,
            16: 2.957,
            17: 3.475,
            26: 2.000,
            27: 2.000,
            28: 2.000,
            29: 2.033,
            30: 2.223,
            35: 3.219,
            50: 2.298,
            53: 2.778,
        }

        w_carbon = electronegativity[6]
        try:
            w_weights = [
                electronegativity[atom.atomic_number]
                for atom in self._getHeavyStructure(st).atom
            ]
        except KeyError as e:
            raise TopoError("Electronegativity unavailable: %s" % e)
        return self._getMultigraphDistanceMatrix(st, w_carbon, tuple(w_weights))

    @cached_topo_descriptor
    def _getMassDistanceMatrix(self, st):
        """
        Molecular Descriptors p. 909: generalized Ivanciuc weighting from the Barysz distance
        matrix with atomic masses
        See: Handbook p. 488, Barysz distance matrix
        """
        w_carbon = 12.01115
        w_weights = (a.atomic_weight for a in self._getFullStructure(st).atom \
                     if a.atomic_number != 1)
        return self._getMultigraphDistanceMatrix(st, w_carbon, tuple(w_weights))

    @cached_topo_descriptor
    def _getVDWVolumeDistanceMatrix(self, st):
        """
        Molecular Descriptors p. 909: generalized Ivanciuc weighting from the Barysz distance
        matrix with van der Waals volumes
        See: Handbook p. 488, Barysz distance matrix
        """
        # van der Waals volume (V^vdw) indexed by atomic number
        # Molecular Descriptors p. 22, Table A3
        vdw_volume = {
            5: 17.875,
            6: 22.449,
            7: 15.599,
            8: 11.494,
            9: 9.203,
            13: 36.511,
            14: 31.976,
            15: 26.522,
            16: 24.429,
            17: 23.228,
            26: 41.052,
            27: 35.041,
            28: 17.157,
            29: 11.494,
            30: 38.351,
            35: 31.059,
            50: 45.830,
            53: 38.792,
        }

        w_carbon = vdw_volume[6]
        try:
            w_weights = [
                vdw_volume[atom.atomic_number]
                for atom in self._getHeavyStructure(st).atom
            ]
        except KeyError as e:
            raise TopoError("van der Waals volume unavailable: %s" % e)
        return self._getMultigraphDistanceMatrix(st, w_carbon, tuple(w_weights))

    @cached_topo_descriptor
    def _getPolarizabilityDistanceMatrix(self, st):
        """
        Molecular Descriptors p. 909: generalized Ivanciuc weighting from the Barysz distance
        matrix with polarizabilities
        See: Handbook p. 488, Barysz distance matrix
        """
        # polarizability (alpha) indexed by atomic number
        # Molecular Descriptors p. 22, Table A3
        polarizability = {
            5: 3.030,
            6: 1.760,
            7: 1.100,
            8: 0.802,
            9: 0.557,
            13: 6.800,
            14: 5.380,
            15: 3.630,
            16: 2.900,
            17: 2.180,
            26: 8.400,
            27: 7.500,
            28: 6.800,
            29: 6.100,
            30: 7.100,
            35: 3.050,
            50: 7.700,
            53: 5.350,
        }

        w_carbon = polarizability[6]
        try:
            w_weights = [
                polarizability[atom.atomic_number]
                for atom in self._getHeavyStructure(st).atom
            ]
        except KeyError as e:
            raise TopoError("Polarizability unavailable: %s" % e)
        return self._getMultigraphDistanceMatrix(st, w_carbon, tuple(w_weights))

    @cached_topo_descriptor
    def _getAtomicWalkCounts(self, st, k):
        """
        Handbook p. 480: atomic walk count (awc) -- the total number of
        equipoise walks of length k starting from vertex vi.
        """
        # Get A^k
        adjacency_k = self._getAdjacencyMatrix(st)
        for i in range(k - 1):
            adjacency_k = numpy.dot(adjacency_k, self._getAdjacencyMatrix(st))
        return numpy.sum(adjacency_k, axis=0)

    @cached_topo_descriptor
    def _getVDWSurfaceArea(self, st):
        """
        Molecular Descriptors p. 609: van der Waals surface area (VSA) --
        calculated from the atomic van der Waals radius, summing over
        contributions from atoms in the adjacency matrix.
        """

        # Correction term depending on the bond order:
        # 0 for single, 0.1 for aromatic, 0.2 for double, 0.3 for triple
        correction = {1: 0, 1.5: 0.1, 2: 0.20, 3: 0.30}
        vdw_surface_areas = []
        full_st = self._getFullStructure(st)
        for i, atom_i in enumerate(full_st.atom, start=1):

            R_i = atom_i.radius
            contribution_sum = 0.0
            for bond in atom_i.bond:
                j = bond.atom2.index
                R_j = bond.atom2.radius
                # Hydrogens are single bonded, otherwise get bond order
                if 1 in (atom_i.atomic_number, bond.atom2.atomic_number):
                    c_ij = correction[1]
                else:
                    c_ij = correction[self._getConventionalBondOrder(st, i, j)]
                b_ij = mm.mmideal_get_stretch(full_st, (i, j)) - c_ij
                g_ij = min(max(abs(R_i - R_j), b_ij), R_i + R_j)
                contribution_sum += old_div((pow(R_j, 2) - pow(R_i - g_ij, 2)),
                                            g_ij)

            # VSA_i = (4 * PI * R_i^2) - (PI * P_I * ij_contributions)
            vsa_i = math.pi * R_i * (4 * R_i - contribution_sum)
            vdw_surface_areas.append(vsa_i)
        return vdw_surface_areas

    # =============================================================================
    # Non-cached helper functions
    # =============================================================================

    def _balbanOperator(self, st, matrix):
        """
        Handbook p. 7, Ivanciuc-Balaban operator IB(M) -- a modified Randic
        operator summing over all bonded pairs for row sums of each
        participant::

            B/(C+1) * sum_bonds[ (R(i) * R(j)) ^ -0.5 ]

        C is the cyclomatic number, B is the number of bonds, and R is the row
        sum operator.
        """
        B = self._getBondCount(st)
        C = self._getCyclomaticNumber(st)
        row_sum = [sum(row) for row in matrix]
        dist_sum = 0.0
        for i, j in self._getBondIndices(st):
            dist_sum += pow(row_sum[i - 1] * row_sum[j - 1], -0.5)
        return (old_div(float(B), (C + 1))) * dist_sum

    def _centricIndexSum(self, st, value_function, centric_function):
        """
        Calculates atom centric index based on an atomic property function and
        a base function of the centric index sum.

        :param value_function: function to extract atomic property
        :type value_function: f(st, atom_index)

        :param centric_function: base function for component value of count
        :type centric_function: f(count)
        """
        values = [value_function(st, i) for i in self._getAtomIndices(st)]
        value_count_tuples = Counter(values).most_common()
        return sum(centric_function(count) for _, count in value_count_tuples)

    def _generalizedConnectivityIndex(self, st, m, vertex_func, exponent):
        """
        Handbook p. 86: formula for generalized connectivity indices, used for
        several descriptors.

        :param m: index order of the connectivity index
        :type m: int

        :param vertex_func: vertex degree function
        :type vertex_func: f(st, index)

        :param exponent: power to raise vertex degree product
        :type exponent: float
        """
        self._assertMultipleHeavyAtoms(st)  # prevent pow(0, exponent)
        self._assertPathLength(st, m)
        connectivity_index = 0.0
        for path in self._getConnectivityPaths(st)[m]:
            vertex_degrees = [vertex_func(st, i) for i in path]
            connectivity_index += pow(numpy.prod(vertex_degrees), exponent)
        return connectivity_index

    def _propertyRangeVDWSurfaceArea(self, st, properties, intervals, n):
        """
        Molecular Descriptors p. 609: P_VSA -- the amount of van der Waals
        surface area (VSA) having a property value in a certain range. These
        descriptors correspond to a partition of the molecular surface area
        conditioned by the atomic values of the property P.

        :param properties: 1-based atomic index
        :type properties: list

        :param intervals: 1-based atomic index
        :type intervals: list

        :param n: 1-based interval
        :type n: int
        """
        surface_areas = self._getVDWSurfaceArea(st)
        if len(properties) > len(surface_areas):
            raise TopoError("Mismatch between atom values and number of atoms")

        surface_area_sum = 0.0
        for value, vsai in zip(properties, surface_areas):
            # Find bin corresponding to a_i <= value < a_i+1
            if bisect.bisect_right(intervals, value) + 1 == n:
                surface_area_sum += vsai
        return surface_area_sum


# =============================================================================
# External helper functions
# =============================================================================


[docs]def check_canvas_license():
    """
    Check that a CANVAS_FULL license is available to use
    """
    CANVAS_LICENSE = canvas_base.ChmLicenseShared()
    if not CANVAS_LICENSE.isValid():
        raise TopoError("VSA descriptors require Canvas license")


[docs]def matrix_reciprocal(matrix):
    """
    Returns a matrix whose elements are the reciprocal of each of the
    elements of the original matrix. The diagonal of the matrix must be
    zeros, and the routine assumes that no other zeros exist in the matrix.
    """
    if numpy.sum(numpy.diag(matrix)) != 0.0:
        raise TopoError("Diagonal of matrix must be zeros.")
    inv_matrix = matrix.copy()
    # Non-zero number for inversion
    numpy.fill_diagonal(inv_matrix, -1)
    inv_matrix = old_div(1.0, inv_matrix)
    # Assign zeros back to diagonal
    numpy.fill_diagonal(inv_matrix, 0.0)
    return inv_matrix