Source code for schrodinger.protein.assignment

"""
Module for optimizing hydroxyl, thiol and water orientiations, Chi-flips of
asparagine, glutamine and histidine, and protonation states of aspartic acid,
glutamic acid, and histidine.

Usage: ProtAssign(st)

Copyright Schrodinger, LLC. All rights reserved.

"""
import copy
import itertools
import json
import math
import operator
import random
import re
import shutil
import sys
import tempfile
from collections import defaultdict
from dataclasses import dataclass
from dataclasses import field
from pathlib import Path
from typing import List
from typing import Tuple
from typing import Union

import numpy as np
from scipy import ndimage

from schrodinger import structure
from schrodinger.application.bioluminate import protein
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import minimize
from schrodinger.structutils import transform
from schrodinger.structutils.interactions import find_pi_cation_interactions
from schrodinger.structutils.interactions import find_pi_pi_interactions
from schrodinger.structutils.measure import create_distance_cell
from schrodinger.utils import mmutil
from schrodinger.utils import subprocess

# Check if toulbar2 is available to solve network exact
TOULBAR2_AVAILABLE = shutil.which('toulbar2') is not None

LOG_NONE = 0
LOG_BASIC = 1
LOG_EXTRA = 2
LOG_DEBUG = 3
LOG_FULL_DEBUG = 4
LOG_SCORE_DEBUG = 5
DEFAULT_LOG_LEVEL = LOG_BASIC
log_level = DEFAULT_LOG_LEVEL
label_color = 13

pH_vlow = 'very_low'
pH_low = 'low'
pH_neutral = 'neutral'
pH_high = 'high'

pka_property = 'r_pa_PropKa_pKa'
# Atom property name containing name of residue state
STATE_PROPERTY = 's_pa_state'
ACCEPTOR_PROPERTY = 'b_pa_acceptor'
DONOR_PROPERTY = 'b_pa_donor'
CLASHER_PROPERTY = 'b_pa_clasher'
ION_PROPERTY = 'b_pa_ion'
# Flag whether a donor or acceptor is static
STATIC_PROPERTY = 'b_pa_static'
# Flag that indicates a Structure is annotated for faster self-scoring.
ANNOTATED_PROPERTY = 'b_pa_is_annotated'

# Max distance between changeables to be clustered together
CLUSTERING_DISTANCE = 4.0
# Distance between the heavies of changeables after which the interaction
# energy drops to 0
MAX_HEAVY_HEAVY_INTERACTION_DISTANCE = 4.5
# Maximum distance between certain changeables to orient its hydrogen for
# adding states.
MAX_ORIENT_HYDROGEN_DISTANCE = 5.0
# H-H clash cutoff involving two polar H. The cutoff is set such that the
# hydrogen bond network of 6GST His-14 is resolved correctly.
POLAR_CLASH_CUTOFF = 1.9
# H-H clash cutoff involving at least one non-polar H
# Unknown how this cutoff was determined originally.
NONPOLAR_CLASH_CUTOFF = 1.77
# Acceptor-acceptor clash cutoff
ACCEPTOR_ACCEPTOR_CLASH_CUTOFF = 2.80

BOND_MSG_TEMPLATE = ('Cannot determine flip state of {resstr} due to unexpected'
                     'covalent bond.')

FLIP = 'Flip'
NO_FLIP = 'No Flip'

RESNAME_FLIPSTATES_MAP = {'ASN': (NO_FLIP, FLIP), 'GLN': (NO_FLIP, FLIP)}
for resname in ('HID', 'HIE', 'HIP'):
    RESNAME_FLIPSTATES_MAP[resname] = (resname, f'{FLIP} {resname}')
RESNAME_FLIPSTATES_MAP['HIS'] = RESNAME_FLIPSTATES_MAP['HID']

ND1 = 'ND1'
ND2 = 'ND2'
NE2 = 'NE2'
CD = 'CD'
CD2 = 'CD2'
CE1 = 'CE1'
CG = 'CG'
HIS_PDBNAMES = {CD2, CE1, CG, ND1, NE2}
AMD_PDBNAME_GROUPS = ({CG, 'OD1', ND2}, {CD, 'OE1', NE2})
HIS_NEIGHBOR_MAP = {
    ND1: {CG, CE1},
    NE2: {CE1, CD2},
    CE1: {ND1, NE2},
    CD2: {NE2, CG}
}

# Properties
PKA_PROPERTY = 'r_pka_prediction'
INTERACTION_PROPERTY_TEMPLATE = 'b_pka_{0}_interaction{1}'
BULK_SOLVENT_ACCESSIBLE_PROPERTY = 'b_pka_bulk_solvent_accessible'

# Strings
METAL = 'metal'
CARBOXYL = 'carboxyl'
PI_CATION = 'pi-cation'
ARGININE = 'arginine'
PI_PI = 'pi-pi'
ACCEPTOR = 'acceptor'
FORCED_ACCEPTOR = 'forced_acceptor'
STATIC_DONOR = 'static_donor'

# Histidine parameters
HISTIDINE_NAMES = {'HIS ', 'HID ', 'HIE ', 'HIP '}
AMIDE_PDBRES = {'ASN ', 'GLN '}
ASPARTIC_ACID_NAMES = {'ASP ', 'ASH '}
GLUTAMIC_ACID_NAMES = {'GLU ', 'GLH '}
# Residue atom name that carries all the annotation for the empircal pKa
# predictions
ANNOTATED_ATOM_PDBNAME = ' CA '

# Chemical entity related
# H C N O F S Se
ORGANIC_ATOMIC_NUMBERS = {1, 6, 7, 8, 9, 16, 34}
CARBOXYLIC_ACID_SMARTS = '[CX3](=O)[OX1H0-]'

# Some distances and angles shared across classes and functions for emperical
# pKa prediction
MAX_INTERACTION_DISTANCE = 5.0
MAX_HYDROGEN_CLASHING_DISTANCE = 1.8
MAX_HBOND_DISTANCE = 3.0
MIN_HBOND_ANGLE = 120


# Simply utility functions for empircal pKa predictions
[docs]def get_annotated_atom(residue):
    """Returns annotated atom of residue  _StructureAtom or None"""
    return residue.getAtomByPdbName(ANNOTATED_ATOM_PDBNAME)


[docs]def get_annotated_atom_property(residue):
    """Returns property dict of annotated atom or an empty dict if atom is not present"""
    atom = get_annotated_atom(residue)
    if atom is None:
        return {}
    return atom.property


[docs]def get_pka(residue):
    """Return predicted pKa of residue"""
    return get_annotated_atom_property(residue).get(PKA_PROPERTY)


[docs]def set_pka(residue, pka):
    """Set predicted pKa of residue"""
    p = get_annotated_atom_property(residue)
    p[PKA_PROPERTY] = pka


[docs]def shift_pka(residue, amount):
    """Shift the predicted pKa of residue"""
    p = get_annotated_atom_property(residue)
    p[PKA_PROPERTY] += amount


[docs]def get_interaction_label(label, suffix):
    """Create property label for a given interaction."""
    if suffix is None:
        suffix = ''
    else:
        suffix = '_' + suffix
    return INTERACTION_PROPERTY_TEMPLATE.format(label, suffix)


[docs]def set_interaction_label(residue, label, suffix=None):
    """Set a label of the annoted atom of residue to True"""
    p = get_annotated_atom_property(residue)
    key = get_interaction_label(label, suffix)
    p[key] = True


[docs]def has_interaction_label(residue, label, suffix=None):
    """Check if the annotated atom of the residue has a label"""
    key = get_interaction_label(label, suffix)
    return bool(get_annotated_atom_property(residue).get(key))


[docs]def residue_to_label(residue_or_atom):
    """Create string from residue or atom."""
    pdbname = ''
    if hasattr(residue_or_atom, 'getResidue'):
        pdbname = ' ' + residue_or_atom.pdbname.strip()
        residue = residue_or_atom.getResidue()
    else:
        residue = residue_or_atom
    return f'{residue.pdbres.strip()} {residue}{pdbname}'


[docs]def log_interaction(residue1, residue2, name, distance=None, angle=None):
    """Log the interaction between two residues and its distance and angle optionally"""
    label1 = residue_to_label(residue1)
    label2 = residue_to_label(residue2)
    msg = [f'Found {name} interaction between {label1} and {label2}']
    if distance is not None:
        msg.append(f'    Distance: {distance:.2f}')
    if angle is not None:
        msg.append(f'    Angle: {angle:.2f}')
    report(LOG_BASIC, '\n'.join(msg))


class _Grid:
    """A simple grid class to detect bulk solvent water"""

    def __init__(self, array, spacing, origin):
        """
        :param array: Array of grid
        :type array: numpy.ndarray with ndim 3
        :param spacing: Spacing between grid points in angstrom
        :type spacing: float
        :param origin: Origin of grid
        :type origin: ndarray of length 3
        """
        self.array = array
        self.spacing = spacing
        self.origin = origin

    @classmethod
    def from_structure(cls, st, spacing=0.75, padding=None):
        """
        Create grid based on structure

        :param spacing: Spacing between grid points in angstrom
        :type spacing: float
        :param padding: Added space to each dimension based on min and max of XYZ coordinates
        :type padding: float
        """
        xyz = st.getXYZ()
        origin = xyz.min(axis=0) - padding
        right_top = xyz.max(axis=0) + padding
        shape = ((right_top - origin) / spacing + 0.5).astype(int)[::-1]
        array = np.ones(shape, bool)
        return cls(array, spacing, origin)


[docs]def get_bulk_solvent_accessible_atoms(st,
                                      radius=2.5,
                                      radius2=3.0,
                                      spacing=0.50,
                                      include_water=False):
    """
    Return bulk solvent accessible atoms.

    Bulk solvent accessible atoms are determined by creating a grid and setting
    occupied voxels to 1 using a fixed radius for each atom and then checking
    if any non-occupied voxels are present within a slightly larger radius.

    :param st: Structure to determine bulk solvent accessible atoms. The atom property BULK_SOLVENT_ACCESSIBLE_PROPERTY will be set for each atom.
    :type st: Structure
    :param radius: Initial radius
    :type radius: float
    :param radius2:
    :type radius2: float
    :param spacing: Spacing of grid. Lower spacing makes the calculation more accurate but requires more compute time and memory.
    :type spacing: float
    :param include_water: Whether to keep waters or not when calculating bulk solvent accessible atoms.
    :type include_water: bool

    :return: List of atom indices that are bulk solvent accessible
    :rtype: List[int]
    """
    grid = _Grid.from_structure(st, spacing=spacing, padding=2 * radius)
    cutoff2 = (radius / grid.spacing)**2
    array = grid.array
    if include_water:
        xyzs = st.getXYZ()
    else:
        xyzs = np.asarray([
            a.xyz for a in st.atom if a.pdbres not in {'HOH ', 'DOD ', 'SPC '}
        ])
    for xyz in xyzs:
        center = (xyz - grid.origin) / grid.spacing
        start = (center - radius / grid.spacing + 0.5).astype(int)
        end = (center + radius / grid.spacing).astype(int) + 1
        for iz in range(start[2], end[2]):
            dz = center[2] - iz
            dz2 = dz * dz
            for iy in range(start[1], end[1]):
                dy = center[1] - iy
                dy2 = dy * dy
                for ix in range(start[0], end[0]):
                    dx = center[0] - ix
                    dx2 = dx * dx
                    distance2 = dz2 + dy2 + dx2
                    if distance2 <= cutoff2:
                        array[iz, iy, ix] = 0

    bulk_solvent, num_features = ndimage.label(array)
    value = bulk_solvent[0, 0, 0]
    bulk_solvent[bulk_solvent != value] = 0

    iatoms = []
    cutoff2 = (radius2 / grid.spacing)**2
    for atom in st.atom:
        center = (atom.xyz - grid.origin) / grid.spacing
        start = (center - radius2 / grid.spacing + 0.5).astype(int)
        end = (center + radius2 / grid.spacing).astype(int) + 1
        value = 0
        atom.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY] = False
        for iz in range(start[2], end[2]):
            dz = center[2] - iz
            dz2 = dz * dz
            for iy in range(start[1], end[1]):
                dy = center[1] - iy
                dy2 = dy * dy
                for ix in range(start[0], end[0]):
                    dx = center[0] - ix
                    dx2 = dx * dx
                    distance2 = dz2 + dy2 + dx2
                    if distance2 <= cutoff2:
                        value += bulk_solvent[iz, iy, ix]
            if value > 0:
                iatoms.append(int(atom))
                atom.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY] = True
                if atom.atomic_number == 1:
                    # Set neighboring heavy atom as bulk solvent accessible
                    for ba in atom.bonded_atoms:
                        ba.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY] = True
                        iatoms.append(ba.index)
                break
    # if a heavy atom is bulk solvent accessible, then also its hydrogens are.
    for iatom in iatoms:
        atom = st.atom[iatom]
        if not atom.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY]:
            continue
        if atom.atomic_number == 1:
            continue
        for ba in atom.bonded_atoms:
            if ba.atomic_number == 1:
                ba.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY] = True
                iatoms.append(ba.index)

    return sorted(set(iatoms))


[docs]def get_unsatisfied_donors(st, include_bsa=True):
    """
    Return unsatisfied buried donors

    :param st: Structure to analyze
    :type st: Structure
    :param include_bsa: Include bulk solvent accessible atoms
    :type include_bsa: bool

    :return: List of unsatisfied donor atoms
    :rtype: List[_StructureAtom]
    """
    sasa_by_atom = analyze.calculate_sasa_by_atom(st)
    dcell = create_distance_cell(st, MAX_HBOND_DISTANCE, honor_pbc=False)
    unsatisfied = []
    for atom1, sasa in zip(st.atom, sasa_by_atom):
        if atom1.atomic_number != 1:
            continue
        hydrogen = atom1
        heavy = None
        for ba in hydrogen.bonded_atoms:
            heavy = ba
        if heavy is None:
            continue
        if heavy.atomic_number in (1, 6):
            continue
        heavy_sasa = sasa_by_atom[int(heavy) - 1]
        if max(sasa, heavy_sasa) > 3.0:
            continue
        residue1 = hydrogen.getResidue()
        is_hydrogen_bonded = False
        is_clashing = False
        for iatom2 in dcell.query(*hydrogen.xyz):
            atom2 = st.atom[iatom2]
            residue2 = atom2.getResidue()
            if str(residue1) == str(residue2):
                continue
            distance = st.measure(hydrogen, atom2)
            if atom2.atomic_number == 1 and distance < MAX_HYDROGEN_CLASHING_DISTANCE:
                is_clashing = True
            if atom2.atomic_number in (1, 6):
                continue
            if atom2.pdbname == ' N  ':
                continue
            # If the hydrogen is clashing it is also unsatisfied
            angle = st.measure(heavy, hydrogen, atom2)
            if distance <= MAX_HBOND_DISTANCE and angle >= MIN_HBOND_ANGLE:
                is_hydrogen_bonded = True
        if not is_hydrogen_bonded or is_clashing:
            unsatisfied.append(heavy)
    if not include_bsa:
        bsa = set(get_bulk_solvent_accessible_atoms(st))
        tmp = []
        for atom in unsatisfied:
            if atom.index not in bsa:
                tmp.append(atom)
        unsatisfied = tmp
    return unsatisfied


[docs]def get_carboxyl_atoms(residue):
    """
    Get carboxyl atom groups from residue. Returns multiple groups if
    multiple are present. The first atom will be the carbon. It
    specifically adds Glu and Asp even if they are protonated and uses a
    SMARTS pattern otherwise to extract the carboxyl group.

    :param residue: Residue to check
    :type residue: _Residue

    :return: List of carboxyl atoms
    :rtype: List[List[_StructureAtom]]
    """
    carboxyls = []
    # Add neutral carboxyls of glu and asp also to list so that a
    # ProtAssign prepped structure also works, as ProtAssign will protonate
    # all asps and glus while processing the structure in intermediate states.
    is_asp = residue.pdbres in ASPARTIC_ACID_NAMES
    is_glu = residue.pdbres in GLUTAMIC_ACID_NAMES
    c = None
    if is_asp:
        c = residue.getAtomByPdbName(' CG ')
    elif is_glu:
        c = residue.getAtomByPdbName(' CD ')
    if c is not None:
        carboxyl = [c]
        for neighbor in c.bonded_atoms:
            if neighbor.atomic_number == 8:
                carboxyl.append(neighbor)
        if len(carboxyl) == 3:
            # Check if the carboxyl is neutral. If so add it to the
            # pool, otherwise it will be added in the next section
            for oxygen in carboxyl[1:]:
                if len(list(oxygen.bonded_atoms)) == 2:
                    carboxyls.append(carboxyl)
                    break

    # Extract residue structure for fast SMARTS matching
    st = residue.structure
    atom_indices = residue.getAtomIndices()
    residue_st = residue.extractStructure()
    matches = analyze.evaluate_smarts_canvas(residue_st, CARBOXYLIC_ACID_SMARTS)
    for match in matches:
        assert len(match) == 3
        carboxyl = []
        for m in match:
            atom = st.atom[atom_indices[m - 1]]
            assert atom.atomic_number in (6, 8)
            carboxyl.append(atom)
            continue
        carboxyl = sorted(carboxyl, key=lambda a: a.atomic_number)
        carboxyls.append(carboxyl)
    return carboxyls


[docs]def get_residue_neighborhoods(st, residues, distance_cell):
    neighborhoods = defaultdict(list)
    for residue in residues:
        neighboring_residues = {}
        for atom in residue.atom:
            for iatom2 in distance_cell.query(*atom.xyz):
                residue2 = st.atom[iatom2].getResidue()
                if str(residue) == str(residue2):
                    continue
                neighboring_residues[str(residue2)] = residue2
        residues = list(neighboring_residues.values())
        neighborhoods[str(residue)] = residues
    return neighborhoods


[docs]def protonate_histidine(histidine):
    nd1 = histidine.getAtomByPdbName(' ND1')
    ne2 = histidine.getAtomByPdbName(' NE2')
    ce1 = histidine.getAtomByPdbName(' CE1')
    cd2 = histidine.getAtomByPdbName(' CD2')
    cg = histidine.getAtomByPdbName(' CG ')
    nd1.formal_charge = 1
    ne2.formal_charge = 0
    st = histidine.structure
    st.getBond(nd1, ce1).type = structure.BondType.Double
    st.getBond(cd2, ne2).type = structure.BondType.Single
    st.getBond(ce1, ne2).type = structure.BondType.Single
    st.getBond(cg, cd2).type = structure.BondType.Double
    build.add_hydrogens(st)


[docs]def charge_arginine_sidechains(st):
    """
    Make arginine sidechains charged. Assumes bond orders have been assigned correctly.

    Looks for the nitrogen that has a double bond to the CZ atom, and changes
    the formal charge to 1 and retypes atom.
    """
    for residue in st.residue:
        if residue.pdbres != 'ARG ':
            continue
        cz = residue.getAtomByPdbName(' CZ ')
        if cz is None:
            continue
        # Get the nitrogen that has a double bond with its neighboring heavy
        # atom as that is the one that should be charged.
        for nh_pdbname in (' NH2', ' NH1'):
            nh = residue.getAtomByPdbName(nh_pdbname)
            # Nothing to do if atom doesnt exist or formal charge is already 1
            if nh is None or nh.formal_charge == 1:
                continue
            if st.getBond(nh, cz).order == 2:
                # Set formal charges and retype
                nh.formal_charge = 1
                nh.retype()
                break


[docs]class HistidinepKaPredictor:
    """Empirical histidine pKa predictor"""

    # pKa related numbers
    INTERNAL_PKA = 6.5
    # Negative shifts
    METAL_PKA_SHIFT = -4.0
    STATIC_DONOR_PKA_SHIFT = -3.0
    FORCED_ACCEPTOR_PKA_SHIFT = -3.0
    PI_CATION_PKA_SHIFT = -1.0
    # Postive shifts
    PI_PI_PKA_SHIFT = 1.0
    CARBOXYLIC_ACID_PKA_SHIFT = 1.0
    DOUBLE_SIDED_HBOND_PKA_SHIFT = 1.0

    # Distance and angle cutoffs to determine interactions
    MAX_STATIC_DONOR_INTERACTION_DISTANCE = 3.0
    MIN_STATIC_DONOR_INTERACTION_ANGLE = 120
    MAX_STATIC_ACCEPTOR_INTERACTION_DISTANCE = 2.5
    MIN_STATIC_ACCEPTOR_INTERACTION_ANGLE = 120
    MAX_METAL_INTERACTION_DISTANCE = 3.5
    MAX_PI_PI_INTERACTION_DISTANCE = 4.5
    MAX_PI_PI_INTERACTION_ANGLE = 30
    MAX_PI_CATION_INTERACTION_DISTANCE = 4.5
    MAX_PI_CATION_INTERACTION_ANGLE = 30
    MAX_PI_ARG_INTERACTION_DISTANCE = 5.5
    MAX_CARBOXYLIC_ACID_INTERACTION_DISTANCE = 3.8
    MIN_CARBOXYLIC_ACID_INTERACTION_ANGLE = 140

    NITROGEN_PDBNAMES = {' ND1', ' NE2'}
    CARBON_PDBNAMES = {' CD2', ' CE1'}
    SIDECHAINS_PDBNAMES = NITROGEN_PDBNAMES | CARBON_PDBNAMES | {' CG '}

[docs]    def predict(self, st, protassign=None, flip=False):
        """
        Predict histidine pKa with empirical rules.
        Currently this requires the structure being annotated by ProtAssign.

        :param st: ProtAssign annotated structure
        :type st: Structure
        :param protassign: ProtAssign instance to calculate forced acceptor interactions
        :type protassign: schrodinger.protein.assignment.ProtAssign
        :param flip: Calculate pKa of flipped version of histidne
        :type flip: bool

        Returns None but annotates ANNOTATED_ATOM of each histidine with
        found interactions and predicted pKa
        """
        self._st = st
        # Gather all histidines that have their nitrogen and carbon atoms
        self._histidines = []
        for r in st.residue:
            if r.pdbres not in HISTIDINE_NAMES:
                continue
            for pdbname in self.SIDECHAINS_PDBNAMES:
                atom = r.getAtomByPdbName(pdbname)
                if atom is None:
                    break
            else:
                self._histidines.append(r)
        self._distance_cell = create_distance_cell(st,
                                                   MAX_INTERACTION_DISTANCE,
                                                   honor_pbc=False)
        self._nitrogen_pdbnames = self.CARBON_PDBNAMES if flip else self.NITROGEN_PDBNAMES
        self._neighborhoods = get_residue_neighborhoods(st, self._histidines,
                                                        self._distance_cell)
        for histidine in self._histidines:
            set_pka(histidine, self.INTERNAL_PKA)

        # Lower pKa values
        self.metal_interactions()
        self.identify_forced_amide_states()
        self.static_donor_interactions()
        self.cation_pi_interactions()
        self.arginine_interactions()

        # Increase pKa values
        self.pi_pi_interactions()
        self.carboxyl_interactions()
        self.static_acceptor_interactions()
        # Forced acceptor pKa calculations are expensive and may not be
        # required; it also requires a protassign instance to be provided.
        if protassign is not None:
            report(LOG_BASIC,
                   "Determining pKa penalties for forced acceptor histidines.")
            self.forced_acceptor_interactions(st, protassign)

[docs]    def metal_interactions(self):
        report(LOG_BASIC, "Determining pKa penalties for metal interactions")
        st = self._st
        for his in self._histidines:
            metals = []
            residues = self._neighborhoods[str(his)]
            for residue in residues:
                for atom in residue.atom:
                    if atom.atomic_number not in ORGANIC_ATOMIC_NUMBERS:
                        metals.append(atom)
            if not metals:
                continue
            for pdbname in self._nitrogen_pdbnames:
                nitrogen = his.getAtomByPdbName(pdbname)
                for metal in metals:
                    distance = st.measure(nitrogen, metal)
                    if distance < self.MAX_METAL_INTERACTION_DISTANCE:
                        set_interaction_label(his, METAL, pdbname.strip())
                        log_interaction(nitrogen, metal, METAL, distance)
                        if has_interaction_label(his, METAL):
                            break
                        set_interaction_label(his, METAL)
                        shift_pka(his, self.METAL_PKA_SHIFT)

[docs]    def cation_pi_interactions(self):
        report(LOG_BASIC,
               "Determining pKa penalties for pi-cation interactions")
        st = self._st
        cation_pi_interactions = find_pi_cation_interactions(st)
        for cpi in cation_pi_interactions:
            atom = st.atom[cpi.pi_centroid.atoms[0]]
            if atom.pdbres not in HISTIDINE_NAMES:
                continue
            if cpi.distance > self.MAX_PI_CATION_INTERACTION_DISTANCE:
                continue
            min_angle = self.MAX_PI_CATION_INTERACTION_ANGLE
            max_angle = 180 - min_angle
            if min_angle < abs(cpi.angle) < max_angle:
                continue

            cation = st.atom[cpi.cation_centroid.atoms[0]].getResidue()
            # ProtAssign charges ASN and GLN to do it's stuff...
            if cation.pdbres in AMIDE_PDBRES:
                continue
            histidine = atom.getResidue()
            set_interaction_label(histidine, PI_CATION)
            shift_pka(histidine, self.PI_CATION_PKA_SHIFT)
            log_interaction(histidine, cation, PI_CATION, cpi.distance,
                            cpi.angle)

[docs]    def arginine_interactions(self):
        report(LOG_BASIC, "Determining pKa penalties for arginine interactions")
        for his in self._histidines:
            args = [
                r for r in self._neighborhoods[str(his)] if r.pdbres == 'ARG '
            ]
            if not args:
                continue
            cg = his.getAtomByPdbName(' CG ')
            nd1 = his.getAtomByPdbName(' ND1')
            ne2 = his.getAtomByPdbName(' NE2')
            nd1_xyz = np.asarray(nd1.xyz)
            ne2_xyz = np.asarray(ne2.xyz)
            centroid1 = (nd1_xyz + ne2_xyz) / 2.0
            # Calculate the normal of the histidine plane
            v1 = nd1_xyz - cg.xyz
            v2 = ne2_xyz - cg.xyz
            n1 = np.cross(v1, v2)
            n1 /= np.linalg.norm(n1)
            for arg in args:
                nh2 = arg.getAtomByPdbName(' NH2')
                nh1 = arg.getAtomByPdbName(' NH1')
                ne = arg.getAtomByPdbName(' NE ')
                cz = arg.getAtomByPdbName(' CZ ')
                if None in (nh2, nh1, ne, cz):
                    continue
                centroid2 = np.asarray(cz.xyz)
                distance = np.linalg.norm(centroid1 - centroid2)
                if distance > self.MAX_PI_ARG_INTERACTION_DISTANCE:
                    continue
                v1 = nh2.xyz - centroid2
                v2 = nh1.xyz - centroid2
                n2 = np.cross(v1, v2)
                n2 /= np.linalg.norm(n2)
                theta = np.arccos(np.inner(n1, n2))
                angle = abs(np.rad2deg(theta))
                if self.MAX_PI_CATION_INTERACTION_ANGLE < angle < (
                        180 - self.MAX_PI_CATION_INTERACTION_ANGLE):
                    continue
                if not (has_interaction_label(his, ARGININE) or
                        has_interaction_label(his, PI_CATION)):
                    shift_pka(his, self.PI_CATION_PKA_SHIFT)
                set_interaction_label(his, ARGININE)
                log_interaction(his, arg, ARGININE, distance, angle)

[docs]    def pi_pi_interactions(self):
        """Check if histidine is involved in pi-pi interaction"""
        report(LOG_BASIC, "Determining pKa penalties for pi-pi interactions")
        st = self._st
        pi_pi_interactions = find_pi_pi_interactions(st)
        for ppi in pi_pi_interactions:
            if ppi.distance > self.MAX_PI_PI_INTERACTION_DISTANCE:
                continue
            max_angle = self.MAX_PI_PI_INTERACTION_ANGLE
            min_angle = 180 - max_angle
            if max_angle < abs(ppi.angle) < min_angle:
                continue
            for ring1 in (ppi.ring1, ppi.ring2):
                iatom1 = ring1.atoms[0]
                atom1 = st.atom[iatom1]
                if atom1.pdbres not in HISTIDINE_NAMES:
                    continue
                histidine = atom1.getResidue()
                if not has_interaction_label(histidine, PI_PI):
                    shift_pka(histidine, self.PI_PI_PKA_SHIFT)
                set_interaction_label(histidine, PI_PI)

                # Get other residue to report on
                ring2 = ppi.ring1
                if ring2.atoms[0] == iatom1:
                    ring2 = ppi.ring2
                iatom2 = ring2.atoms[0]
                residue = st.atom[iatom2].getResidue()
                log_interaction(histidine, residue, PI_PI, ppi.distance,
                                ppi.angle)

[docs]    def carboxyl_interactions(self):
        """Check if histidine is interacting with a carboxyl group."""
        report(LOG_BASIC, "Determining pKa penalties for carboxyl interactions")
        st = self._st
        for his in self._histidines:
            carboxyls = []
            for residue in self._neighborhoods[str(his)]:
                carboxyls += get_carboxyl_atoms(residue)
            # Now check for interactions
            for c, o1, o2 in carboxyls:
                o_ave_xyz = (np.asarray(o1.xyz) + o2.xyz) / 2.0
                # Set O1 to average O position to measure distance and angle
                o1_xyz = o1.xyz
                o1.xyz = o_ave_xyz
                carboxyl_residue = c.getResidue()
                # One carboxyl only interacts once with a histidine, but can
                # act as an acceptor for multiple atoms in the histidine.
                is_interacting = False
                for pdbname in self._nitrogen_pdbnames:
                    n = his.getAtomByPdbName(pdbname)
                    distance = st.measure(o1, n)
                    if distance > self.MAX_CARBOXYLIC_ACID_INTERACTION_DISTANCE:
                        continue
                    angle = st.measure(c, o1, n)
                    if angle < self.MIN_CARBOXYLIC_ACID_INTERACTION_ANGLE:
                        continue
                    # Do not count the same carboxyl more than once
                    if not is_interacting:
                        set_interaction_label(his, CARBOXYL)
                        shift_pka(his, self.CARBOXYLIC_ACID_PKA_SHIFT)
                        log_interaction(n, carboxyl_residue, CARBOXYL, distance,
                                        angle)
                    set_interaction_label(his, ACCEPTOR, pdbname.strip())
                    is_interacting = True
                o1.xyz = o1_xyz

[docs]    def static_donor_interactions(self):
        """Check if histidine is interacting with a static donor"""
        report(LOG_BASIC, "Determining pKa interactions for static donors")
        st = self._st
        for his in self._histidines:
            static_donors = []
            for residue in self._neighborhoods[str(his)]:
                for atom in residue.atom:
                    if atom.atomic_number == 1:
                        continue
                    # We consider lysines static
                    p = atom.property
                    is_static_donor = bool(
                        p.get(DONOR_PROPERTY) and p.get(STATIC_PROPERTY))
                    is_static_donor |= atom.pdbres in {
                        'LYS ', 'LYH '
                    } and atom.pdbname == ' NZ '
                    if not is_static_donor:
                        continue
                    for bonded_atom in atom.bonded_atoms:
                        if bonded_atom.atomic_number != 1:
                            continue
                        if not bonded_atom.property.get(DONOR_PROPERTY):
                            continue
                        static_donors.append((atom, bonded_atom))
            iterator = itertools.product(self._nitrogen_pdbnames, static_donors)
            for pdbname, (donor, hydrogen) in iterator:
                nitrogen = his.getAtomByPdbName(pdbname)
                distance = st.measure(hydrogen, nitrogen)
                if distance > self.MAX_STATIC_DONOR_INTERACTION_DISTANCE:
                    continue
                angle = st.measure(nitrogen, hydrogen, donor)
                if angle < self.MIN_STATIC_DONOR_INTERACTION_ANGLE:
                    continue
                if not has_interaction_label(his, STATIC_DONOR):
                    shift_pka(his, self.STATIC_DONOR_PKA_SHIFT)
                set_interaction_label(his, STATIC_DONOR)
                log_interaction(nitrogen, donor, STATIC_DONOR, distance, angle)

[docs]    def identify_forced_amide_states(self):
        """
        Identify amide residues that are forced in a state due to them
        interacting with a static donor. This is done in a single pass and not
        iteratively.
        """
        st = self._st
        for residue in st.residue:
            # ProtAssign resets the flip of the amide so we need to check both
            # the O and N atom to see if it is interacting with a static donor.
            if residue.pdbres == 'ASN ':
                o_pdbname = ' OD1'
                n_pdbname = ' ND2'
            elif residue.pdbres == 'GLN ':
                o_pdbname = ' OE1'
                n_pdbname = ' NE2'
            else:
                continue
            o1 = residue.getAtomByPdbName(o_pdbname)
            n1 = residue.getAtomByPdbName(n_pdbname)
            if o1 is None or n1 is None:
                continue
            # Remove previous assigned property names of protassign to amides
            for a in (o1, n1):
                p = a.property
                p.pop(DONOR_PROPERTY, None)
                p.pop(ACCEPTOR_PROPERTY, None)
            for o, n in [(o1, n1), (n1, o1)]:
                for iatom in self._distance_cell.query(*o.xyz):
                    atom = st.atom[iatom]
                    if str(atom.getResidue()) == str(residue):
                        continue
                    if atom.atomic_number == 1:
                        continue
                    p = atom.property
                    if not (p.get(DONOR_PROPERTY) and p.get(STATIC_PROPERTY)):
                        continue
                    for hydrogen in atom.bonded_atoms:
                        if hydrogen.atomic_number != 1:
                            continue
                        distance = st.measure(o, hydrogen)
                        if distance > self.MAX_STATIC_DONOR_INTERACTION_DISTANCE:
                            continue
                        angle = st.measure(atom, hydrogen, o)
                        if angle < self.MIN_STATIC_DONOR_INTERACTION_ANGLE:
                            continue
                        o.property[ACCEPTOR_PROPERTY] = True
                        o.property[STATIC_PROPERTY] = True
                        n.property[DONOR_PROPERTY] = True
                        n.property[STATIC_PROPERTY] = True
                        report(
                            LOG_BASIC,
                            f"Forced acceptor/donor found for amide {residue.pdbres} {residue}"
                        )
            if o.property.get(ACCEPTOR_PROPERTY) and n.property.get(
                    ACCEPTOR_PROPERTY):
                report(
                    LOG_BASIC,
                    f"Amide {residue.pdbres} {residue} is interacting with static donor on both sides"
                )
                for atom in (o, n):
                    del atom.property[ACCEPTOR_PROPERTY]
                    del atom.property[DONOR_PROPERTY]
                    del atom.property[STATIC_PROPERTY]

[docs]    def static_acceptor_interactions(self):
        """Check if histidine is interacting with a static acceptor"""
        report(LOG_BASIC, "Determining pKa interactions for static acceptors")
        st = self._st
        for his in self._histidines:
            # If histidine is already involved in a static donor interaction,
            # don't bother.
            if any(
                    has_interaction_label(his, label)
                    for label in (METAL, STATIC_DONOR)):
                continue
            # The histidine needs to be protonated in this function. Extract
            # histidine and protonate
            hip_st = his.extractStructure()
            hip = next(iter(hip_st.residue))
            protonate_histidine(hip)
            acceptors = []
            for residue in self._neighborhoods[str(his)]:
                for atom in residue.atom:
                    p = atom.property
                    if p.get(ACCEPTOR_PROPERTY):
                        if str(atom.getResidue()) == str(his):
                            continue
                        acceptor = hip_st.addAtom(atom.element, *atom.xyz)
                        acceptor.pdbres = atom.pdbres
                        acceptor.resnum = atom.resnum
                        acceptor.chain = atom.chain
                        acceptors.append(acceptor)
            iterator = itertools.product(self._nitrogen_pdbnames, acceptors)
            for pdbname, acceptor in iterator:
                nitrogen = hip.getAtomByPdbName(pdbname)
                hydrogen = None
                for ba in nitrogen.bonded_atoms:
                    if ba.atomic_number == 1:
                        hydrogen = ba
                assert hydrogen is not None
                distance = hip_st.measure(hydrogen, acceptor)
                if distance > self.MAX_STATIC_ACCEPTOR_INTERACTION_DISTANCE:
                    continue
                angle = hip_st.measure(nitrogen, hydrogen, acceptor)
                if angle < self.MIN_STATIC_ACCEPTOR_INTERACTION_ANGLE:
                    continue
                set_interaction_label(his, ACCEPTOR, pdbname.strip())
                log_interaction(nitrogen, acceptor, ACCEPTOR, distance, angle)
            for nitrogens in self._nitrogen_pdbnames:
                if all(
                        has_interaction_label(his, ACCEPTOR, n)
                        for n in nitrogens):
                    if not has_interaction_label(his, ACCEPTOR):
                        shift_pka(his, self.DOUBLE_SIDED_HBOND_PKA_SHIFT)
                    set_interaction_label(his, ACCEPTOR)
                    report(LOG_BASIC, f"Both nitrogens are donating of {his}")


class _CarboxylicAcidpKaPredictor:

    # pKa related
    INTERNAL_PKA = None
    # Shifts
    METAL_PKA_SHIFT = -3.0
    STATIC_ACCEPTOR_PKA_SHIFT = 4.0
    CARBOXYLIC_ACID_PKA_SHIFT = 4.0

    # Interaction related distances and angles
    MAX_STATIC_ACCEPTOR_INTERACTION_DISTANCE = 3.5
    MIN_STATIC_ACCEPTOR_INTERACTION_ANGLE = 150
    MAX_METAL_INTERACTION_DISTANCE = 3.5
    MAX_CARBOXYLIC_ACID_INTERACTION_DISTANCE = 3.8
    MIN_CARBOXYLIC_ACID_INTERACTION_ANGLE = 150

    # Some names/strings
    PDBRES_NAMES = None
    CARBON_PDBNAME = None
    OXYGEN_PDBNAMES = None
    SIDECHAIN_PDBNAMES = None

    def predict(self, st):
        self._st = st
        self._residues = []
        for r in st.residue:
            if r.pdbres not in self.PDBRES_NAMES:
                continue
            for pdbname in self.SIDECHAIN_PDBNAMES:
                atom = r.getAtomByPdbName(pdbname)
                if atom is None:
                    break
            else:
                self._residues.append(r)
        for residue in self._residues:
            set_pka(residue, self.INTERNAL_PKA)
        self._distance_cell = create_distance_cell(st,
                                                   MAX_INTERACTION_DISTANCE,
                                                   honor_pbc=False)
        self._neighborhoods = get_residue_neighborhoods(st, self._residues,
                                                        self._distance_cell)

        # Shift pKa down. Favor charged state
        self.metal_interactions()

        # Shift pKa up. Favor neutral state
        self.carboxyl_interactions()
        self.static_acceptor_interactions()

    def static_acceptor_interactions(self):
        """Check if histidine is interacting with a static acceptor"""
        report(LOG_BASIC, "Determining pKa interactions for static acceptors")
        st = self._st
        for carboxylic_residue in self._residues:
            if has_interaction_label(carboxylic_residue, METAL):
                continue
            if has_interaction_label(carboxylic_residue, CARBOXYL):
                continue
            acceptors = []
            for residue in self._neighborhoods[str(carboxylic_residue)]:
                for atom in residue.atom:
                    p = atom.property
                    if p.get(ACCEPTOR_PROPERTY) and p.get(STATIC_PROPERTY):
                        acceptors.append(atom)
            carbon = carboxylic_residue.getAtomByPdbName(self.CARBON_PDBNAME)
            oxygen1 = carboxylic_residue.getAtomByPdbName(
                self.OXYGEN_PDBNAMES[0])
            oxygen2 = carboxylic_residue.getAtomByPdbName(
                self.OXYGEN_PDBNAMES[1])
            oxygen1_xyz = np.asarray(oxygen1.xyz, float)
            ave_oxygen_xyz = (oxygen1_xyz + oxygen2.xyz) / 2.0
            oxygen1.xyz = ave_oxygen_xyz
            for acceptor in acceptors:
                if str(acceptor.getResidue()) == str(carboxylic_residue):
                    continue
                distance = st.measure(oxygen1, acceptor)
                if distance > self.MAX_STATIC_ACCEPTOR_INTERACTION_DISTANCE:
                    continue
                angle = st.measure(carbon, oxygen1, acceptor)
                if angle < self.MIN_STATIC_ACCEPTOR_INTERACTION_ANGLE:
                    continue
                if not has_interaction_label(carboxylic_residue, ACCEPTOR):
                    set_interaction_label(carboxylic_residue, ACCEPTOR)
                    shift_pka(carboxylic_residue,
                              self.STATIC_ACCEPTOR_PKA_SHIFT)
                log_interaction(oxygen1, acceptor, ACCEPTOR, distance, angle)
            # Reset oxygen position
            oxygen1.xyz = oxygen1_xyz

    def metal_interactions(self):
        report(LOG_BASIC, "Determining pKa penalties for metal interactions")
        st = self._st
        for coo_residue in self._residues:
            metals = []
            residues = self._neighborhoods[str(coo_residue)]
            for residue in residues:
                for atom in residue.atom:
                    if atom.atomic_number not in ORGANIC_ATOMIC_NUMBERS:
                        metals.append(atom)
            if not metals:
                continue
            for pdbname in self.OXYGEN_PDBNAMES:
                oxygen = coo_residue.getAtomByPdbName(pdbname)
                if oxygen is None:
                    continue
                for metal in metals:
                    distance = st.measure(oxygen, metal)
                    if distance > self.MAX_METAL_INTERACTION_DISTANCE:
                        continue
                    set_interaction_label(coo_residue, METAL)
                    shift_pka(coo_residue, self.METAL_PKA_SHIFT)
                    break

    def carboxyl_interactions(self):
        """Check if histidine is interacting with a carboxyl group."""
        report(LOG_BASIC, "Determining pKa penalties for carboxyl interactions")
        st = self._st
        for coo_residue in self._residues:
            carboxyls = []
            for residue in self._neighborhoods[str(coo_residue)]:
                carboxyls += get_carboxyl_atoms(residue)
            # Now check for interactions
            carbon = coo_residue.getAtomByPdbName(self.CARBON_PDBNAME)
            oxygen1 = coo_residue.getAtomByPdbName(self.OXYGEN_PDBNAMES[0])
            oxygen2 = coo_residue.getAtomByPdbName(self.OXYGEN_PDBNAMES[1])
            oxygen1_xyz = np.asarray(oxygen1.xyz, float)
            ave_oxygen_xyz = (oxygen1_xyz + oxygen2.xyz) / 2.0
            oxygen1.xyz = ave_oxygen_xyz
            for c, o1, o2 in carboxyls:
                carboxyl_residue = c.getResidue()
                for oxygen in (o1, o2):
                    distance = st.measure(oxygen1, oxygen)
                    if distance > self.MAX_CARBOXYLIC_ACID_INTERACTION_DISTANCE:
                        continue
                    angle = st.measure(carbon, oxygen1, oxygen)
                    if angle < self.MIN_CARBOXYLIC_ACID_INTERACTION_ANGLE:
                        continue
                    # Do not count the same carboxyl more than once. Also do
                    # not protonate both carboxyls, although it is inclear
                    # whether they should both be protonated or just a single
                    # one.
                    if not has_interaction_label(
                            coo_residue,
                            CARBOXYL) and not has_interaction_label(
                                carboxyl_residue, CARBOXYL):
                        set_interaction_label(coo_residue, CARBOXYL)
                        shift_pka(coo_residue, self.CARBOXYLIC_ACID_PKA_SHIFT)
                        log_interaction(oxygen1, carboxyl_residue, CARBOXYL,
                                        distance, angle)
            oxygen1.xyz = oxygen1_xyz


[docs]class AsparticAcidpKaPredictor(_CarboxylicAcidpKaPredictor):
    """Aspartic acid emperical pKa predictor"""

    # Taken from average of pKa values in cleaned up PKAD database
    INTERNAL_PKA = 3.4

    PDBRES_NAMES = ASPARTIC_ACID_NAMES
    CARBON_PDBNAME = ' CG '
    OXYGEN_PDBNAMES = [' OD1', ' OD2']
    SIDECHAIN_PDBNAMES = OXYGEN_PDBNAMES + [CARBON_PDBNAME]


[docs]class GlutamicAcidpKaPredictor(_CarboxylicAcidpKaPredictor):
    """Glutamic acid emperical pKa predictor"""

    # Taken from average of pKa values in cleaned up PKAD database
    INTERNAL_PKA = 4.2

    PDBRES_NAMES = GLUTAMIC_ACID_NAMES
    CARBON_PDBNAME = ' CD '
    OXYGEN_PDBNAMES = [' OE1', ' OE2']
    SIDECHAIN_PDBNAMES = OXYGEN_PDBNAMES + [CARBON_PDBNAME]


[docs]class PropKaException(Exception):

[docs]    def __init__(self, value):
        self.parameter = value


[docs]def report(message_level=1, message=''):
    if message_level <= log_level:
        print(message)
        sys.stdout.flush()
    return


[docs]def measure(ct,
            atom1=None,
            atom2=None,
            atom3=None,
            atom4=None,
            use_xtal=False,
            max_dist=10.0):
    # A replacement for ct.measure() which also supports xtal symmetry, and can return the coordinates of
    # the nearest xtal copies of each atom when requested.
    def find_nearest_atom_image(ct, atom1, atom2):
        lowest_distance = None
        best_match = None
        atoms = analyze.evaluate_asl(ct, 'atom.i_pa_atomindex %d' % atom2)
        for atom in atoms:
            distance = ct.measure(atom1, atom)
            if lowest_distance is None or distance < lowest_distance:
                lowest_distance = distance
                best_match = int(atom)
        return best_match

    def extract(ct, atoms):
        new_ct = structure.create_new_structure(len(atoms))
        for i, atom in enumerate(atoms):
            new_atom = new_ct.atom[i + 1]
            old_atom = ct.atom[atom]
            new_atom.element = old_atom.element
            new_atom.xyz = old_atom.xyz
            new_atom.color = old_atom.color
            new_atom.atom_type = old_atom.atom_type
            new_atom.property['i_pa_atomindex'] = old_atom.property[
                'i_pa_atomindex']
        for property in [
                's_pdb_PDB_CRYST1_Space_Group', 'r_pdb_PDB_CRYST1_a',
                'r_pdb_PDB_CRYST1_b', 'r_pdb_PDB_CRYST1_c',
                'r_pdb_PDB_CRYST1_alpha', 'r_pdb_PDB_CRYST1_beta',
                'r_pdb_PDB_CRYST1_gamma'
        ]:
            new_ct.property[property] = ct.property[property]
        return new_ct

    def merge_mates(atom_ct, atom_ct_mates):
        atom_ct_mates.insert(0, atom_ct)
        for i, mate in enumerate(atom_ct_mates):
            for atom in mate.atom:
                atom.property['i_pa_xtalatom'] = i
        for i in range(1, len(atom_ct_mates)):
            atom_ct_mates[0].extend(atom_ct_mates[i])
        return atom_ct_mates

    def find_atom1(atom_ct_mates, atom1):
        for atom in atom_ct_mates[0].atom:
            if atom.property['i_pa_xtalatom'] == 0 and atom.property[
                    'i_pa_atomindex'] == atom1:
                xtal_atom1 = int(atom)
                break
        return xtal_atom1

    def create_list(atom1, atom2, atom3, atom4):
        if atom4 is not None:
            atoms = [atom1, atom2, atom3, atom4]
        elif atom3 is not None:
            atoms = [atom1, atom2, atom3]
        else:
            atoms = [atom1, atom2]
        return atoms

    def find_other_atoms(atom_ct_mates, xtal_atom1, atom2, atom3, atom4):
        xtal_atom2 = find_nearest_atom_image(atom_ct_mates[0], xtal_atom1,
                                             atom2)
        if atom3 is not None:
            xtal_atom3 = find_nearest_atom_image(atom_ct_mates[0], xtal_atom2,
                                                 atom3)
        else:
            xtal_atom3 = None
        if atom4 is not None:
            xtal_atom4 = find_nearest_atom_image(atom_ct_mates[0], xtal_atom3,
                                                 atom4)
        else:
            xtal_atom4 = None
        return (xtal_atom2, xtal_atom3, xtal_atom4)

    # Only do the expensive xtal version of measure if it's a surface atom (or if not sure if it's a surface atom).
    if use_xtal and ('i_pa_surfaceatom' not in ct.atom[atom1].property or
                     ct.atom[atom1].property['i_pa_surfaceatom'] == 1):
        atoms = create_list(atom1, atom2, atom3, atom4)
        atom_ct = extract(ct, atoms)
        atom_ct_mates = analyze.generate_crystal_mates(atom_ct, radius=max_dist)
        atom_ct_mates = merge_mates(atom_ct, atom_ct_mates)
        # Get atom1 (in the primary cell).
        xtal_atom1 = find_atom1(atom_ct_mates, atom1)
        # Get atom2 (the closest to atom1), and so on.
        (xtal_atom2, xtal_atom3,
         xtal_atom4) = find_other_atoms(atom_ct_mates, xtal_atom1, atom2, atom3,
                                        atom4)
        return atom_ct_mates[0].measure(xtal_atom1, xtal_atom2, xtal_atom3,
                                        xtal_atom4)
    else:
        value = ct.measure(atom1, atom2, atom3, atom4)
        return value


[docs]def calculate_interaction_matrix(ct, iatoms, distance, use_xtal=False):
    """
    Create an interaction matrix based on the changeable_index atom property

    :param ct: Structure with annotated atoms having set the
        i_pa_changeable_index corresponding the the index of the changeable
    :type ct: structure.Structure
    :param iatoms: List of atom indices which take part in interaction
    :type iatoms: List[int]
    :param distance: Max distance between interacting atoms
    :type distance: float
    :param use_xtal: Take into account crystal symmetry mates
    :param use_xtal: bool

    :return: interaction matrix allowing double indexing: interact[i][j]
    :rtype: defaultdict(lambda: defaultdict(bool))
    """
    # Interacting matrix implemented as a nested dict so we can do
    # interact[i][j] and be consistent with original implementation while
    # saving on memory.
    interact = defaultdict(lambda: defaultdict(bool))
    # Create a distance cell to query containing only the atoms of interest
    if not iatoms:
        return interact
    ct_part = ct.extract(iatoms, copy_props=True)
    if use_xtal:
        mates = analyze.generate_crystal_mates(ct_part, distance)
        for mate in mates[1:]:
            ct_part.extend(mate)
    dcell = create_distance_cell(ct_part, distance, honor_pbc=False)
    for iatom1 in iatoms:
        atom1 = ct.atom[iatom1]
        index1 = atom1.property['i_pa_changeable_index']
        for iatom2 in dcell.query(*atom1.xyz):
            index2 = ct_part.atom[iatom2].property['i_pa_changeable_index']
            # Changeables can currently only interact with other
            # changeables. It is possible that a changeable has
            # self-interaction with its crystal mate, but this is not
            # implemented down-stream, so we ignore that for now.
            if index1 == index2:
                continue
            interact[index1][index2] = True
            interact[index2][index1] = True
    return interact


[docs]def annotate_structure_interactors(ct, acceptors, donors, clashers):
    """
    Set atom property for each interactor class

    :param ct: Structure to annotate
    :type ct: Structure
    :param acceptors: List of acceptor atom indices
    :type acceptors: List[int]
    :param donors: List of donor pair atom indices
    :type donors: List[tuple[int, int]]
    :param clashers: List of clasher atom indices
    :type clashers: List[int]

    :return: None but sets atom properties
    :rtype: NoneType
    """
    for acceptor in acceptors:
        ct.atom[acceptor].property[ACCEPTOR_PROPERTY] = 1
    for heavy, hydrogen in donors:
        if heavy == hydrogen:
            ct.atom[heavy].property[ION_PROPERTY] = 1
        else:
            ct.atom[heavy].property[DONOR_PROPERTY] = 1
            ct.atom[hydrogen].property[DONOR_PROPERTY] = 1
    for clasher in clashers:
        ct.atom[clasher].property[CLASHER_PROPERTY] = 1


[docs]def generate_annotated_ct(ct, donors, acceptors, clashers, use_xtal=False):
    """
    Generate an annotated Structure that contains crystal mates.
    The annotated heavily speeds up the self scoring step for large and xtal structures

    :param ct: Structure to annotate
    :type ct: Structure
    :param donors: List of donor pairs
    :type donors: List[Tuple[int, int]]
    :param acceptors: List of acceptor atom indices
    :type acceptors: List[int]
    :param clashers: List of clasher atom indices
    :type clashers: List[int]
    :param use_xtal: Create crystal mates in annotated output structure
    :type use_xtal: bool

    :return: New annotated structure with property ANNOTATED_PROPERTY set to True
    :rtype: Structure
    """
    # Annotate structure so we can use an efficient distance cell
    # to extract close contacts during self-scoring.
    working_ct = ct.copy()
    annotate_structure_interactors(working_ct, acceptors, donors, clashers)
    # Setup a crystal mate ct if necessary.
    if use_xtal:
        mates = analyze.generate_crystal_mates(
            working_ct, radius=MAX_HEAVY_HEAVY_INTERACTION_DISTANCE)
        # The first one is just a copy of the original
        for mate in mates[1:]:
            working_ct.extend(mate)
    working_ct.property[ANNOTATED_PROPERTY] = True
    return working_ct


[docs]def check_residue_flip_state(res: structure._Residue) -> tuple:
    """
    Determine whether a residue cannot be flipped, is, or is not flipped.

    :param res: a protein residue
    :return: a tuple of `(state, msg)`, where `state` describes whether the
            residue is flipped (`True`), is not flipped (`False`), or cannot be
            flipped (`None`); if `None`, `msg` will contain an explanation
    :rtype: tuple[bool or NoneType, str]
    """
    pdbname_atom_map = {
        atom.pdbname.strip(): atom
        for atom in res.atom
        if atom.atomic_number != 1
    }
    is_amd = any(
        all(n in pdbname_atom_map
            for n in pdbnames)
        for pdbnames in AMD_PDBNAME_GROUPS)
    is_his = all(name in pdbname_atom_map for name in HIS_PDBNAMES)

    is_flipped, msg = None, ''
    if not is_amd and not is_his:
        msg = (f'Cannot determine flip state of {get_residue_string(res)}: only'
               f' {RESNAME_FLIPSTATES_MAP.keys()} can be flipped.')
    elif is_amd:
        # Residue is an amide; verify that there are no unexpected bonds
        nitrogen = pdbname_atom_map.get(ND2) or pdbname_atom_map.get(NE2)
        carbon = pdbname_atom_map.get(CD) or pdbname_atom_map.get(CG)
        bound_atoms = [a for a in get_heavy_neighbors(nitrogen) if a != carbon]
        if bound_atoms:
            msg = BOND_MSG_TEMPLATE.format(resstr=get_residue_string(res))
        else:
            is_flipped = carbon.property.get(STATE_PROPERTY) == FLIP
    else:
        # Residue is a histidine; verify that there are no unexpected bonds
        for pdbname, neighbor_pdbnames in HIS_NEIGHBOR_MAP.items():
            atom = pdbname_atom_map[pdbname]
            exp_neighbors = [
                pdbname_atom_map[name] for name in neighbor_pdbnames
            ]
            bound_atoms = [
                a for a in get_heavy_neighbors(atom) if a not in exp_neighbors
            ]
            if bound_atoms:
                msg = BOND_MSG_TEMPLATE.format(resstr=get_residue_string(res))
                break
        else:
            carbon = pdbname_atom_map.get(CG)
            flip_tag = carbon.property.get(STATE_PROPERTY)
            is_flipped = flip_tag is not None and flip_tag.startswith(FLIP)

    return is_flipped, msg


[docs]def get_residue_flip_state(res: structure._Residue) -> Union[bool, type(None)]:
    """
    Return the flip state of a protein residue.

    A truncated version of `check_residue_flip_state()`.

    :param res: a protein residue
    :return: the flip state of a residue
    """
    flip_state, _ = check_residue_flip_state(res)
    return flip_state


[docs]def get_residue_string(residue_or_atom) -> str:
    """
    Return a string describing a residue from a residue or atom.

    The string will match the format
            <chain>:<residue PDB code> <residue number>[<insertion code>]

    :param residue_or_atom: a residue or atom
    :type residue_or_atom: _Residue or _StructureAtom

    :return: a string describing the residue
    """
    chain = '_' if residue_or_atom.chain == ' ' else residue_or_atom.chain
    pdbres = residue_or_atom.pdbres.strip()
    inscode = residue_or_atom.inscode.strip()
    return f'{chain}:{pdbres} {residue_or_atom.resnum}{inscode}'


[docs]def get_atom_string(atom):
    """Return a string describing atom"""
    return get_residue_string(atom) + ':' + atom.pdbname


[docs]def get_heavy_neighbors(atom: structure._StructureAtom) -> list:
    """
    :param atom: an atom
    :return: a list of heavy (non-H) atoms covalently bound to `atom`
    :rtype: list[structure._StructureAtom]
    """
    return [
        bond.atom2
        for bond in atom.bond
        if bond.atom2.atomic_number != 1 and bond.order != 0
    ]


[docs]def get_residue_from_changeable(ct, changeable):
    heavies = changeable.get_heavies()
    return ct.atom[heavies[0]].getResidue()


@dataclass
class _WaterState:
    """
    Container for water states

    :param coordinates: Hydrogen coordinates of water state
    :type coordinates: Tuple[List[float], List[float]]
    :param donating_to: List of atom indices to which water is donating
    :type donating_to: List[int]
    :param accepting_from: List of atom indices from water is accepting a hydrogen
    :type accepting_from: List[int]
    """
    # hydrogen1 and hydrogen2 coordinates
    coordinates: Tuple[List[float], List[float]]
    donating_to: List[int] = field(default_factory=list)
    accepting_from: List[int] = field(default_factory=list)


[docs]class WaterStateEnumerator:
    """
    Enumerate discrete water states that are hydrogen bonding with nearby acceptors and donors

    The goal is to sample likely states while also limiting the number of
    states as solving the combinatorial problem gets harder with more and more
    states.
    """

    # OH bond lenght of water in angstrom
    OH_LENGTH = 1.000
    # HOH angle of water in degrees
    HOH_ANGLE = 109.5
    # Minimum required hydrogen to non-static heavy donor atom distance for it
    # to be not considered clashing.
    MIN_HYDROGEN_NONSTATIC_DONOR_DISTANCE = 2.5

[docs]    def __init__(self, ct, oxygen, acceptors, donors):
        """
        :param ct: Annotated structure with donor/acceptor and static flags.
        :type ct: Structure
        :param oxygen: Oxygen atom of water
        :type oxygen: _StructureAtom
        :param acceptors: List of acceptor atom indices
        :type acceptors: List[int]
        :param donors: List of donor atom indices
        :type donors: List[Tuple[int, int]]
        """
        self.ct = ct
        self.oxygen = oxygen
        self.hydrogen1, self.hydrogen2 = list(oxygen.bonded_atoms)
        self.acceptors = acceptors
        self.donors = donors
        # These attributes will be filled by precalculate and will contain a
        # mapping of atom index to distance or unit vector from oxygen to atom
        self._vector_lookup = {}
        self._distance_lookup = {}
        self._precalculate()
        self._filter_acceptor_donors()
        self._separate_donors()

    def _precalculate(self):
        """
        Pre-calculate all unit vectors and distances from water oxygen to
        acceptor and heavy donor atoms
        """
        oxygen_xyz = np.asarray(self.oxygen.xyz, float)
        for iheavy in itertools.chain(self.acceptors, *self.donors):
            heavy = self.ct.atom[iheavy]
            if heavy.atomic_number == 1:
                continue
            if iheavy == self.oxygen.index:
                continue
            vector = heavy.xyz - oxygen_xyz
            distance = np.linalg.norm(vector)
            if distance < 0.01:
                report(
                    LOG_BASIC,
                    f"Water is severily clashing with {heavy.pdbres} {heavy.resnum}{heavy.inscode} {heavy.chain}"
                )
                continue
            vector /= distance
            self._vector_lookup[iheavy] = vector
            self._distance_lookup[iheavy] = distance

    def _filter_acceptor_donors(self):
        """
        Filter acceptor and donor atoms up to a certain distance and remove
        water from donor list. This is to limit the number of states that are
        enumerated.

        Resets the acceptors and donors attributes
        """
        distance_lookup = self._distance_lookup
        if distance_lookup:
            # The typical max distance between heavy atoms is 4.0 of the
            # acceptors and donors. This is probably too far and we can reduce
            # the distance and filter the acceptors and donors for more
            # efficient state generation.
            # Look for interactors within a certain distance. To limit the number
            # of states generated, stop after at least 2 interactors are found. We
            # start at a distance of 3.5 which is pretty generous.
            distances = np.asarray(list(distance_lookup.values()))
            for max_distance in np.linspace(3.5, 4.0, num=11, endpoint=True):
                ninteractors = (distances <= max_distance).sum()
                if ninteractors > 1:
                    break
        # Filter acceptors
        filtered_acceptors = []
        for iheavy in self.acceptors:
            distance = distance_lookup.get(iheavy)
            if distance is None:
                continue
            if distance <= max_distance:
                filtered_acceptors.append(iheavy)
        # Filter donors
        filtered_donors = []
        for iheavy, ihydrogen in self.donors:
            distance = distance_lookup.get(iheavy)
            if distance is None:
                continue
            if distance <= max_distance:
                filtered_donors.append((iheavy, ihydrogen))
        self.acceptors = filtered_acceptors
        self.donors = filtered_donors

    def _separate_donors(self):
        """Separate the donors into metals and a list of heavy atom"""
        # The donor list also contains metals. Separate these out.
        donor_heavies = []
        for heavy, hydrogen in self.donors:
            # Its a metal if heavy == hydrogen
            if heavy != hydrogen:
                donor_heavies.append(heavy)
        # Some heavies can be in the list multiple times, e.g. other waters
        # have two O-H groups where the O is the same atom.
        self.donor_heavies = sorted(set(donor_heavies))

    def _is_clashing_with_static_donor(self, donor_heavy):
        """
        Check if any of the water hydogens is clashing with a static hydrogen donor

        :param donor_heavy: Donor heavy atom
        :type donor_heavy: _StructureAtom

        :return: True if clashing, False if not
        :rtype: bool
        """
        p = donor_heavy.property
        if not (p.get(DONOR_PROPERTY) and p.get(STATIC_PROPERTY)):
            return False
        for iatom in donor_heavy.bonded_atoms:
            atom = self.ct.atom[iatom]
            if atom.atomic_number != 1:
                continue
            for hydrogen in (self.hydrogen1, self.hydrogen2):
                distance = self.ct.measure(iatom, hydrogen)
                if distance < POLAR_CLASH_CUTOFF:
                    return True
        return False

    def _might_clash_with_nonstatic_donor(self, hydrogen, donor_heavy):
        """
        Check if hydrogen might be clashing with hydrogen of a nonstatic donor

        :param hydrogen: Water hydrogen to check
        :type hydrogen: _StructureAtom
        :param donor_heavy: Donor heavy atom
        :type donor_heavy: _StructureAtom

        :return: True if clashing, False if not
        :rtype: bool
        """
        p = donor_heavy.property
        if not p.get(DONOR_PROPERTY) or p.get(STATIC_PROPERTY):
            return False
        distance = self.ct.measure(hydrogen, donor_heavy)
        return distance < self.MIN_HYDROGEN_NONSTATIC_DONOR_DISTANCE

[docs]    def enumerate_acceptor_acceptor_states(self):
        """
        Enumerate states where water is donating to two acceptors at the same time

        :return: List of water states
        :rtype: List[_WaterState]
        """
        # Align waters so it is hydrogen bonded with 2 acceptors.
        # Rename so we don't have self everywhere
        oxygen = self.oxygen
        hydrogen1 = self.hydrogen1
        hydrogen2 = self.hydrogen2
        ct = self.ct
        states = []
        heavies = itertools.chain(self.acceptors, self.donor_heavies)
        for iheavy1, iheavy2 in itertools.combinations(heavies, 2):
            angle = ct.measure(iheavy1, oxygen, iheavy2)
            is_static_donor = False
            for iatom in (iheavy1, iheavy2):
                p = ct.atom[iatom].property
                if p.get(STATIC_PROPERTY) and p.get(DONOR_PROPERTY):
                    is_static_donor = True
            if is_static_donor:
                continue
            # Check if angle falls within a reasonable interval so the
            # water can actually hydrogen bond to both at the same time
            if not (80 < angle <= 170):
                continue
            vector1 = self._vector_lookup[iheavy1]
            vector2 = self._vector_lookup[iheavy2]
            hydrogen1.xyz = oxygen.xyz + vector1 * self.OH_LENGTH
            hydrogen2.xyz = oxygen.xyz + vector2 * self.OH_LENGTH
            # Since the scoring function used during optimization is so
            # dependent on distance, align perfectly with the closest
            # acceptor
            heavy1 = ct.atom[iheavy1]
            heavy2 = ct.atom[iheavy2]
            distance1 = self._distance_lookup[iheavy1]
            distance2 = self._distance_lookup[iheavy2]
            if distance1 <= distance2:
                ct.adjust(self.HOH_ANGLE, hydrogen1, oxygen, hydrogen2)
            else:
                ct.adjust(self.HOH_ANGLE, hydrogen2, oxygen, hydrogen1)
            # Add the state if the water is not clashing with a static donor
            clashing = any(
                self._is_clashing_with_static_donor(heavy)
                for heavy in (heavy1, heavy2))
            if clashing:
                continue
            state = _WaterState(coordinates=(hydrogen1.xyz, hydrogen2.xyz),
                                donating_to=[iheavy1, iheavy2])
            states.append(state)
        return states

[docs]    def enumerate_donor_donor_states(self):
        """
        Enumerate states where water is accepting from two donors at the same time

        :return: List of water states
        :rtype: List[_WaterState]
        """
        # Generate states where water is accepting from 2 donors
        oxygen = self.oxygen
        hydrogen1 = self.hydrogen1
        hydrogen2 = self.hydrogen2
        states = []
        angle = 180 - self.HOH_ANGLE / 2.0
        for donor1, donor2 in itertools.combinations(self.donor_heavies, 2):
            heavy1 = self.ct.atom[donor1]
            heavy2 = self.ct.atom[donor2]
            average_xyz = (np.asarray(heavy1.xyz) + heavy2.xyz) / 2.0
            vector = -(average_xyz - oxygen.xyz)
            distance = np.linalg.norm(vector)
            if distance < 0.01:
                continue
            vector /= distance
            hydrogen1.xyz = np.asarray(oxygen.xyz) + vector * self.OH_LENGTH
            hydrogen2.xyz = hydrogen1.xyz
            # Temporarily set the heavy coordinate to the average to place
            # the hydrogens
            heavy1_xyz = heavy1.xyz
            heavy1.xyz = average_xyz
            self.ct.adjust(angle, heavy1, oxygen, hydrogen1)
            self.ct.adjust(-angle, heavy1, oxygen, hydrogen2)
            heavy1.xyz = heavy1_xyz
            state = _WaterState(coordinates=(hydrogen1.xyz, hydrogen2.xyz),
                                accepting_from=[int(heavy1),
                                                int(heavy2)])
            states.append(state)
        return states

[docs]    def enumerate_acceptor_states(self):
        """
        Enumerate states where water is donating to a single acceptor

        :return: List of water states
        :rtype: List[_WaterState]
        """
        # Generate states where water is donating to a single acceptor,
        # while letting other hydrogen not point towards static donors.
        oxygen = self.oxygen
        hydrogen1 = self.hydrogen1
        hydrogen2 = self.hydrogen2
        states = []
        for iheavy in sorted(
                set(itertools.chain(self.acceptors, self.donor_heavies))):
            vector = self._vector_lookup[iheavy]
            heavy = self.ct.atom[iheavy]
            if heavy.property.get(STATIC_PROPERTY) and heavy.property.get(
                    DONOR_PROPERTY):
                continue
            # Check if angle falls within a reasonable interval so the
            hydrogen1.xyz = oxygen.xyz + vector * self.OH_LENGTH
            # Move hydrogen2 so it makes the correct angle with hydrogen1
            hydrogen2.xyz = hydrogen1.xyz
            if self._is_clashing_with_static_donor(heavy):
                continue
            self.ct.adjust(self.HOH_ANGLE, hydrogen1, oxygen, hydrogen2)
            # Keep a list of the other heavy donors to calculate clashes
            other_donor_heavies = list(self.donor_heavies)
            try:
                other_donor_heavies.remove(iheavy)
            except ValueError:
                pass
            # Check if hydrogen2 is interacting with a static donor. If
            # so, generate a state where it is not clashing
            axis = self._vector_lookup[iheavy]
            for angle in [None, 120, 120]:
                if angle is not None:
                    self.rotate_hydrogens_along_axis(axis, angle)
                clashing = any(
                    self._is_clashing_with_static_donor(self.ct.atom[iheavy2])
                    for iheavy2 in other_donor_heavies)
                if clashing:
                    continue
                state = _WaterState(coordinates=(hydrogen1.xyz, hydrogen2.xyz),
                                    donating_to=[iheavy])
                states.append(state)
                # Check if the current configuration is interacting with a
                # non-static donor. If so, move on to the next dihedral angle iteration
                # to add another state so the donor could donate to the
                # water in some of its configurations.
                # If not, hydrogen2 is on a stable non-clashing position
                # and we are done.
                clashing = any(
                    self._might_clash_with_nonstatic_donor(
                        hydrogen2, self.ct.atom[iheavy2])
                    for iheavy2 in other_donor_heavies)
                if not clashing:
                    break
        return states

[docs]    def enumerate_donor_states(self):
        """
        Enumerate states where water is accepting from a single donor

        :return: List of water states
        :rtype: List[_WaterState]
        """
        oxygen = self.oxygen
        hydrogen1 = self.hydrogen1
        hydrogen2 = self.hydrogen2
        ct = self.ct
        states = []
        hydrogen_angle = 180 - self.HOH_ANGLE / 2.0
        oxygen_xyz = np.asarray(oxygen.xyz)
        heavies_aligned_to = set()
        for iheavy, ihydrogen in self.donors:
            if iheavy in heavies_aligned_to:
                continue
            # Check if the donor is a static donor, since then we know that the
            # hydrogen position is fixed. If it is not static then we need to
            # assume that the hydrogen will be pointing straight towards the
            # water oxygen and we can use the heavy atom to align the angle.
            iatom_to_alignto = ihydrogen
            if not ct.atom[iheavy].property.get(STATIC_PROPERTY):
                iatom_to_alignto = iheavy
                heavies_aligned_to.add(iheavy)
            ct.adjust(hydrogen_angle, iatom_to_alignto, oxygen, hydrogen1)
            hydrogen2.xyz = hydrogen1.xyz
            ct.adjust(-hydrogen_angle, iatom_to_alignto, oxygen, hydrogen2)
            axis = -self._vector_lookup[iheavy]
            # Keep a list of the other heavy donors to calculate clashes
            other_donor_heavies = list(self.donor_heavies)
            try:
                other_donor_heavies.remove(iheavy)
            except ValueError:
                pass
            for angle in [None, 120, 120]:
                if angle is not None:
                    self.rotate_hydrogens_along_axis(axis, angle)
                # Check if hydrogen is clashing with static donor
                clashing = any(
                    self._is_clashing_with_static_donor(ct.atom[iheavy2])
                    for iheavy2 in other_donor_heavies)
                if clashing:
                    continue
                # State is valid. Add it to the pool
                state = _WaterState(coordinates=(hydrogen1.xyz, hydrogen2.xyz),
                                    accepting_from=[iheavy])
                states.append(state)
                # Check if this state might be clashing with the hydrogen of a
                # non-static donor. If so, generate another state.
                clashing = any(
                    self._might_clash_with_nonstatic_donor(
                        hydrogen2, ct.atom[iheavy2])
                    for iheavy2 in other_donor_heavies)
                if not clashing:
                    break
        return states

[docs]    def rotate_hydrogens_along_axis(self, axis, angle):
        """
        Rotate the water hydrogens along an axis by an angle. Does not return
        anything but moves hydrogens in place.

        :param axis: Axis along to rotate to
        :type axis: 3 floats
        :param angle: Angle to rotate in degrees
        :type angle: float
        """
        # Rotate water along fixed oxygen - donor axis.
        ct = self.ct
        x, y, z = self.oxygen.xyz
        hydrogens = [int(self.hydrogen1), int(self.hydrogen2)]
        # Move to origin
        transform.translate_structure(ct, -x, -y, -z, atom_index_list=hydrogens)
        # Perform the rotation
        rotation = transform.get_rotation_matrix(axis, angle)
        transform.transform_structure(ct, rotation, atom_index_list=hydrogens)
        # Move back to original position
        transform.translate_structure(ct, x, y, z, atom_index_list=hydrogens)


[docs]class NetworkSolver:
    """Wrapper around toulbar2 that exactly solves the hydrogen bond network problem"""

    # Toulbar2 requires a precision to work with as it is internally working
    # with integers. Specify how many decimals are included.
    PRECISION = 5
    # Maximum time that can be spend on solving the network in seconds. Either
    # toulbar2 finds the optimum quickly or it takes a long-long time.
    MAX_TIME = 120
    # Name of the executable that is called
    TOULBAR2 = 'toulbar2'

[docs]    def __init__(self, cluster, interact, upper_bound):
        """
        :param cluster: Hydrogen bond network cluster that will be optimized
        :type cluster: ProtAssign.hbond_cluster
        :param interact: Changeable interaction lookup table.
        :type interact: Dict[int, Dict[int, bool]]
        :param upper_bound: Upper energy bound for the network energy. Required for toulbar2
        :type upper_bound: float
        """
        self.cluster = cluster
        self.interact = interact
        self.upper_bound = upper_bound
        # Save intermediate files in current directory if we are debugging
        self._dir = None if log_level < LOG_DEBUG else '.'
        self.setup_toulbar2_inputs()

[docs]    def setup_toulbar2_inputs(self):
        """
        Setup the input file for toulbar2. Essentially the file contains data
        on the variables involved, and the self and pair scores, all in a json
        file.
        """
        report(LOG_BASIC, "Setting up network to solve.")
        cluster = self.cluster
        fmt = f".{self.PRECISION}f"
        data = {
            "problem": {
                "name": "hydrogen_network_optimization",
                "mustbe": "<" + format(self.upper_bound, fmt)
            },
            "variables": {
                f"v{changeable.index}": changeable.nstates
                for changeable in cluster.changeables
            },
            "functions": {}
        }
        # Setup the self-cost for each state
        functions = data["functions"]
        for changeable in cluster.changeables:
            n = changeable.index
            costs = [format(cost, fmt) for cost in cluster.self_scores[n]]
            functions[f"c{n}"] = {"scope": [f"v{n}"], "costs": costs}

        global_to_local_index = {
            changeable.index: n
            for n, changeable in enumerate(cluster.changeables)
        }
        # Setup the pair interaction for each state
        for changeable1 in cluster.changeables:
            for index2 in self.interact[changeable1.index]:
                if changeable1.index >= index2:
                    continue
                # local index might not be available if the cluster size has
                # been capped in the ProtAssign workflow
                m = global_to_local_index.get(index2)
                if m is None:
                    continue
                changeable2 = cluster.changeables[m]
                key = (changeable1.index, changeable2.index)
                pair_scores = cluster.pair_scores.get(key)
                if pair_scores is None:
                    continue
                costs = [format(cost, fmt) for cost in pair_scores.ravel()]
                scope = [f"v{changeable1.index}", f"v{changeable2.index}"]
                key = f"c_{changeable1.index}_{changeable2.index}"
                functions[key] = {"scope": scope, "costs": costs}
        self.toulbar2_input = tempfile.NamedTemporaryFile(suffix='.cfn',
                                                          dir=self._dir).name
        with open(self.toulbar2_input, 'w') as f:
            json.dump(data, f)

    def __del__(self):
        """Clean up the json/cfn toulbar2 input file"""
        if log_level < LOG_DEBUG:
            Path(self.toulbar2_input).unlink(missing_ok=True)

    def _run_toulbar2(self, options=None):
        """
        Run toulbar2 silently

        :param options: Additional options passed to toulbar2
        :type options: Dict[str, str]

        :return: Output file name of toulbar2
        :rtype: str
        """
        cmd = [self.TOULBAR2, self.toulbar2_input, f'-timer={self.MAX_TIME}']
        if options:
            for key, value in options.items():
                cmd.append(f'-{key}={value}')
        out = tempfile.NamedTemporaryFile(dir=self._dir).name
        cmd.append(f'-w={out}')
        if log_level < LOG_DEBUG:
            stderr = stdout = subprocess.DEVNULL
        else:
            stdout = open("toulbar2.log", "a")
            stderr = open("toulbar2.err", "a")
        # Be very forgiving when toulbar2 fails, as ProtAssign is a central
        # technology.
        try:
            subprocess.run(cmd, stdout=stdout, stderr=stderr)
        except Exception:
            report(LOG_BASIC, "Toulbar2 invocation failed")
        return out

[docs]    def parse_and_delete_output_file(self, out):
        """
        Parse and then delete toulbar2 output file that contains the chosen
        state for each changeable

        :param out: File name of output file
        :type out: str
        :return: List of chosen state of each changeable
        :rtype: List[List[int]]
        """
        solutions = []
        fpath = Path(out)
        if not fpath.is_file():
            return solutions
        with fpath.open() as f:
            for line in f:
                try:
                    sol = [int(x) for x in line.strip().split()]
                except ValueError:
                    report(LOG_BASIC,
                           "Could not interpret line in toulbar2 output file")
                    report(LOG_BASIC, f"Offending line: {line}")
                    continue
                # Also check if lenght of solution is similar to number of
                # changeables just in case...
                if len(sol) != len(self.cluster.changeables):
                    report(
                        LOG_BASIC,
                        "Number of changeables does not match solution length from toulbar2"
                    )
                    continue
                solutions.append(sol)
        if log_level < LOG_DEBUG:
            fpath.unlink(missing_ok=True)
        return solutions

[docs]    def optimal_solution(self):
        """
        Run toulbar2 to get the optimal solution

        :return: Optimal state combination. If toulbar2 fails returns None
        :rtype: List[int] or None
        """
        out = self._run_toulbar2()
        solutions = self.parse_and_delete_output_file(out)
        if solutions:
            return solutions[0]

[docs]    def explore_solutions(self, upper_bound=None, number=1000):
        """
        Run toulbar2 to greedily obtain a number of solutions with an energy
        below a certain upper bound. Note that there are no guarantees here
        about diversity, though each solution will be unique.

        :param upper_bound: upper bound energy. All solutions will have an energy lower than this
        :type upper_bound: float
        :param number: Max number of solutions to find
        :type number: int

        :return: List of state combinations
        :rtype: List[List[int]]
        """
        report(LOG_BASIC, "Solving network.")
        options = {'a': f'{number}'}
        if upper_bound is not None:
            options['ub'] = format(upper_bound, f'.{self.PRECISION}f')
        out = self._run_toulbar2(options=options)
        solutions = self.parse_and_delete_output_file(out)
        return solutions


[docs]class ProtAssign:

[docs]    class changeable:

        asl = 'none'
        max_hbond_distance = 3.5
        hbond_min_angle = 150.0
        hbond_heavy_min_angle = 80.0
        hbond_heavy_max_angle = 140.0

[docs]        def __init__(self, ct, iatom):
            self.index = 0
            self.current_state = 0  #determine
            self.initial_state = 0  #determine
            self.treated = False
            self.locked = False
            self.valid = True
            return

[docs]        def pre_treat_1(self, ct):
            return

[docs]        def pre_treat_2(self, ct):
            return

[docs]        def pre_treat(self, ct):
            self.pre_treat_1(ct)
            build.add_hydrogens(ct, atom_list=self.get_heavies())
            self.pre_treat_2(ct)
            self.treated = True
            return

[docs]        def enumerate_states(self,
                             ct,
                             acceptors,
                             donors,
                             pH,
                             do_flips=True,
                             include_initial=False):
            return

[docs]        def lock_protonation(self):
            return

[docs]        def add_current_to_states(self, ct):
            return

[docs]        def assign_state(self,
                         ct,
                         istate,
                         add_labels=True,
                         label_pkas=False,
                         state_gap=None,
                         verbose=False):
            if not self.treated:
                self.pre_treat(ct)
            self.treated = False
            return []

[docs]        def assign_state_gap(self, atom, state_gaps, report_gaps=True):
            """
            Write the Gap in energy between the lowest energy state and the
            state with different protonation states or heavy atom positions to
            the output ct

            :param atom: The atom that should have properties written to it
            :type atom: structure.StructureAtom

            :param state_gaps: The energy gaps between states for a given
                changeable position.
            :type state_gaps: A dictionary where the keys are strings (state
                names) and the values are floats (energy in kcals of the lowest
                energy combination that has that state)

            :param report_gaps: Whether to report the gaps to the log file as
                well
            :type report_gaps: bool
            """
            for p in "r_pa_state_gap", :
                if p in atom.property:
                    del atom.property[p]
            if state_gaps is None or len(state_gaps) <= 1:
                return
            min_gap = sorted(state_gaps[state] for state in state_gaps)[1]
            atom.property["r_pa_state_gap"] = min_gap
            if log_level >= LOG_EXTRA:
                cutoff = 100
            else:
                cutoff = 2.5
            if report_gaps and min_gap <= cutoff:
                report(LOG_BASIC, (
                    "      Alternative States for {0:10s} Gap {1:5.2f} kcals/mol"
                    .format(self.name, min_gap)))
                for state in sorted(state_gaps, key=lambda k: state_gaps[k]):
                    if state_gaps[state] <= cutoff:
                        report(
                            LOG_BASIC,
                            "        {0:8s} {1:5.2f} kcals/mol".format(
                                state, state_gaps[state]))

[docs]        def update_atom_indices(self, ct, new_indices):
            return

[docs]        def get_new_index(self, ct, atom_index, new_indices):
            if atom_index in new_indices:
                return new_indices[atom_index]
            else:
                return None

[docs]        def get_view_atoms(self):
            return []

[docs]        def swap_atoms(self, ct, atom1, atom2):
            (temp_x, temp_y, temp_z) = ct.atom[atom1].xyz
            ct.atom[atom1].xyz = ct.atom[atom2].xyz
            ct.atom[atom2].xyz = (temp_x, temp_y, temp_z)
            return

[docs]        def get_penalty(self, istate):
            return 0.0

[docs]        def get_adjustable_atoms(self):
            return []

[docs]        def change_pka(self, pka, propka_pH):
            return False

[docs]        def change_empirical_pka(self, pH):
            pass

[docs]        def get_dihedral_atoms(self, ct, h):
            # Borrowed from rotH.py.
            ret_list = []
            # The first atom will be H itself:
            ret_list.append(h)
            # Now find the O or S attached to the H. Note that bonds (like everything
            # associated with structures) are indexed from "1" so we are actually
            # looking for the first bond:
            Xatom = ct.atom[h].bond[1].atom2
            ret_list.append(int(Xatom))
            # Now find a suitable non-H atom bonded to the X:
            C1atom = -1
            for b in Xatom.bond:
                conn_atom = b.atom2
                # This is probably the easiest way to find a non-H atom:
                if not conn_atom.atomic_number == 1:
                    C1atom = int(conn_atom)
                    ret_list.append(C1atom)
                    # Leave the loop:
                    break
            # Check to see that we did find a C1 atom:
            if C1atom < 0:
                return ret_list
            # Now look for the final atom we need:
            for b in ct.atom[C1atom].bond:
                conn_atom = b.atom2
                if int(conn_atom) != ret_list[1]:
                    # This is probably the easiest way to find a non-H atom:
                    #if not conn_atom.atomic_number == 1 :
                    C2atom = int(conn_atom)
                    ret_list.append(C2atom)
                    # Leave the loop:
                    break
            return ret_list

[docs]        def get_close_interactors(self, ct, dcell):
            """
            Return acceptors, donors and clashers that are close to this changeable
            heavy atoms.

            :param ct: Structure with annotated atoms signfying interaction class
            :type ct: Structure
            :param dcell: Distance cell to query for neighboring atoms
            :type dcell: DistanceCell

            :returns: List of acceptors, donor heavy-hydrogen pairs, and clashers atom indices
            :rtype: tuple[list[int], list[tuple[int, int]], list[int]]
            """
            acceptors = set()
            donors = set()
            clashers = set()
            for iatom1 in self.get_heavies():
                for iatom2 in dcell.query(*ct.atom[iatom1].xyz):
                    p = ct.atom[iatom2].property
                    if p.get(ACCEPTOR_PROPERTY):
                        acceptors.add(iatom2)
                    if p.get(DONOR_PROPERTY):
                        atom2 = ct.atom[iatom2]
                        if atom2.atomic_number != 1:
                            for ba in atom2.bonded_atoms:
                                if ba.atomic_number != 1:
                                    continue
                                if not ba.property.get(DONOR_PROPERTY):
                                    continue
                                donor = (iatom2, int(ba))
                                donors.add(donor)
                    if p.get(ION_PROPERTY):
                        donors.add((iatom2, iatom2))
                    if p.get(CLASHER_PROPERTY):
                        clashers.add(iatom2)
            return sorted(acceptors), sorted(donors), sorted(clashers)

[docs]    class amide_changeable(changeable):
        """
        This is the primary amide -NH2 group of ASN and GLN residues.
        """
        asl = '((res.ptype \"ASN \" AND atom.ptype \" CG \") OR (res.ptype \"GLN \" AND atom.ptype \" CD \"))'
        OXYGEN_PDBNAMES = [" OD1", " OE1"]
        NITROGEN_PDBNAMES = [" ND2", " NE2"]
        CARBON_PDBNAMES = [" CG ", " CD "]

[docs]        def __init__(self, ct, iatom):
            self.pka = None
            self.index = 0
            self.name = ''
            self.nstates = 0
            self.num_user_states = 0
            self.state_names = []
            self.carbon = None
            self.oxygen = None
            self.nitrogen = None
            self.hydrogen1 = None
            self.hydrogen2 = None
            self.treated = False
            self.locked = False
            self.valid = True
            self.name = get_residue_string(ct.atom[iatom])
            # Will contain coordinates of oxygen, nitrogen and the two
            # hydrogens for the two states
            self.state_coordinates = []
            self.type = 'AMIDE'
            residue_atoms = ct.getResidueAtoms(iatom)
            for atom in residue_atoms:
                name = atom.pdbname
                if name in self.OXYGEN_PDBNAMES:
                    self.oxygen = int(atom)
                elif name in self.NITROGEN_PDBNAMES:
                    self.nitrogen = int(atom)
                elif name in self.CARBON_PDBNAMES:
                    self.carbon = int(atom)
            if self.carbon is None or self.oxygen is None or self.nitrogen is None:
                report(LOG_EXTRA,
                       'Rejecting ' + self.name + ' due to missing atoms.')
                self.valid = False
                return
            # Make sure the nitrogen is covalently bound ONLY to the carbon
            # CG (for ASN) or CD (for GLN).
            bound_atoms = []
            for bond in ct.atom[self.nitrogen].bond:
                if bond.atom2.atomic_number != 1 and bond.order != 0:
                    bound_atoms.append(bond.atom2.pdbname)

            if len(bound_atoms
                  ) > 1 or bound_atoms[0] not in self.CARBON_PDBNAMES:
                report(LOG_EXTRA,
                       'Rejecting ' + self.name + ' due to covalent bond.')
                self.valid = False
                return

            state_name = ct.atom[self.carbon].property.get(STATE_PROPERTY)
            if state_name == 'Flip':
                self.current_state = 1
                self.initial_state = 1
            else:
                self.current_state = 0
                self.initial_state = 0

[docs]        def pre_treat_2(self, ct):
            hydrogens = []
            for ba in ct.atom[self.nitrogen].bonded_atoms:
                if ba.atomic_number != 1:
                    continue
                ba.property['i_pa_atomindex'] = ba.index
                hydrogens.append(ba.index)
            self.hydrogen1, self.hydrogen2 = hydrogens

[docs]        def enumerate_states(self,
                             ct,
                             acceptors,
                             donors,
                             pH,
                             do_flips=True,
                             include_initial=False):
            """
            Generate states for amides

            Max 2 states are generated by swapping the oxygen and nitrogen and
            readjusting the hydrogens to the nitrogen.

            :param ct: Structure to generate states for
            :type ct: Structure
            :param do_flips: Include the flipped state of the amide
            :type do_flips: bool

            Other parameters are not used
            """
            n_xyz = ct.atom[self.nitrogen].xyz
            o_xyz = ct.atom[self.oxygen].xyz
            h1_xyz = ct.atom[self.hydrogen1].xyz
            h2_xyz = ct.atom[self.hydrogen2].xyz
            self.state_coordinates = [[o_xyz, n_xyz, h1_xyz, h2_xyz]]
            self.state_names = ['No Flip']
            if do_flips or self.initial_state == 1:
                # Extract residue structure to determine hydrogen positions in
                # flipped state
                self.swap_atoms(ct, self.nitrogen, self.oxygen)
                amide_st = ct.atom[
                    self.nitrogen].getResidue().extractStructure()
                self.swap_atoms(ct, self.nitrogen, self.oxygen)
                # Remove hydrogens and rebuild them at the nitrogen to obtain
                # idealized coordinates
                build.delete_hydrogens(amide_st)
                amide = next(iter(amide_st.residue))
                n1, n2 = self.NITROGEN_PDBNAMES
                nitrogen = amide.getAtomByPdbName(n1)
                if nitrogen is None:
                    nitrogen = amide.getAtomByPdbName(n2)
                build.add_hydrogens(amide_st, atom_list=[nitrogen])
                coordinates = [n_xyz, o_xyz]
                for ba in nitrogen.bonded_atoms:
                    if ba.atomic_number == 1:
                        coordinates.append(ba.xyz)
                self.state_coordinates.append(coordinates)
                self.state_names.append('Flip')
            # If the state of the amide is determined in a previous run, we
            # want to retain the state name and its corresponding state
            # coordinates. To accomplish this simply reverse the state
            # coordinate list.
            if self.initial_state == 1:
                self.state_coordinates = self.state_coordinates[::-1]
                # In case there are no flips included, reduce the states
                if not do_flips:
                    self.state_coordinates = self.state_coordinates[:1]
            self.nstates = len(self.state_coordinates)

[docs]        def set_state_coordinates(self, ct, istate):
            """
            Set coordinates of nitrogen, oxygen, and hydrogens to
            pre-calculated state coordinates.
            """
            o_xyz, n_xyz, h1_xyz, h2_xyz = self.state_coordinates[istate]
            ct.atom[self.oxygen].xyz = o_xyz
            ct.atom[self.nitrogen].xyz = n_xyz
            ct.atom[self.hydrogen1].xyz = h1_xyz
            ct.atom[self.hydrogen2].xyz = h2_xyz

[docs]        def assign_state(self,
                         ct,
                         istate,
                         add_labels=True,
                         label_pkas=False,
                         state_gaps=None,
                         verbose=False):
            """
            Assign state to amide residue.

            The state is changed by setting the coordinates of the nitrogen,
            oxygen and 2 hydrogens to pre-calculated positions.

            :param ct: Structure to change
            :type ct: Structure
            :param istate: State index to assign
            :type istate: int
            :param add_labels: Add labels to the carbon atom to indicate flip
            :type add_labels: bool
            :param state_gaps: List of state gaps
            :type state_gaps: List[float]
            :param verbose: Does nothing
            :param verbose: bool
            """
            if not self.treated:
                self.pre_treat(ct)
            self.set_state_coordinates(ct, istate)
            # Annotate carbon atom
            carbon = ct.atom[self.carbon]
            carbon.label_format = "%UT"
            carbon.label_color = label_color
            state_name = self.state_names[istate]
            carbon.property[STATE_PROPERTY] = state_name
            if add_labels:
                label = ''
                if istate == 1:
                    label = state_name
                carbon.label_user_text = label
            self.assign_state_gap(carbon, state_gaps, report_gaps=verbose)
            self.current_state = istate
            return []

[docs]        def update_atom_indices(self, ct, new_indices):
            # Gotta be a better way...
            self.carbon = self.get_new_index(ct, self.carbon, new_indices)
            self.oxygen = self.get_new_index(ct, self.oxygen, new_indices)
            self.nitrogen = self.get_new_index(ct, self.nitrogen, new_indices)
            self.hydrogen1 = self.get_new_index(ct, self.hydrogen1, new_indices)
            self.hydrogen2 = self.get_new_index(ct, self.hydrogen2, new_indices)

[docs]        def get_heavies(self):
            return [self.oxygen, self.nitrogen]

[docs]        def get_state_sites(self, ct, istate):
            """
            Return state sites consisting of acceptors, donors, clashers and
            charge.

            :return: List of acceptor, donor, clasher atom indices and charge
            :rtype: Tuple[List[int], List[int, int], List[int], float]
            """
            self.set_state_coordinates(ct, istate)
            return ([self.oxygen], [(self.nitrogen, self.hydrogen1),
                                    (self.nitrogen, self.hydrogen2)], [], 0.0)

[docs]        def get_view_atoms(self):
            return [self.carbon, self.oxygen, self.nitrogen]

[docs]        def get_penalty(self, istate):
            return 0.0

[docs]        def get_adjustable_atoms(self):
            return [self.oxygen, self.nitrogen, self.hydrogen1, self.hydrogen2]

[docs]    class histidine_changeable(changeable):
        """
        Imidazole group of Histidine residues.
        """
        asl = '((res.ptype \"HIS \",\"HID \",\"HIE \",\"HIP \")) AND ((atom.ptype \" CG \"))'

[docs]        def __init__(self, ct, iatom):
            # Calculated pKa of histidine and when flipped
            self.pka = None
            self.pka_flip = None
            # Flags to indicate whther this histidine is forced to be neutral
            # or charged for pKa prediction
            self.force_neutral = False
            self.force_charged = False
            self.index = 0
            self.name = ''
            self.nstates = 0
            self.num_user_states = 0
            self.state_names = []
            self.penalties = []
            self.CG = None
            self.ND1 = None
            self.HD1 = None
            self.NE2 = None
            self.HE2 = None
            self.CD2 = None
            self.HD2 = None
            self.CE1 = None
            self.HE1 = None
            self.treated = False
            self.locked = False
            self.valid = True
            self.name = get_residue_string(ct.atom[iatom])
            self.type = 'HISTIDINE'

            # Find hydrogen atoms:
            residue_atoms = ct.getResidueAtoms(iatom)
            for atom in residue_atoms:
                name = atom.pdbname.strip()
                if name in self.__dict__:
                    self.__dict__[name] = int(atom)
                # Catch non-standard hydrogen names.
                elif atom.atomic_number == 1:
                    for heavy in atom.bonded_atoms:
                        bond = ct.getBond(heavy, atom)
                        if bond.order == 0:
                            continue
                        if int(heavy) == self.ND1:
                            self.HD1 = int(atom)
                        elif int(heavy) == self.NE2:
                            self.HE2 = int(atom)
                        elif int(heavy) == self.CD2:
                            self.HD2 = int(atom)
                        elif int(heavy) == self.CE1:
                            self.HE1 = int(atom)
            if self.CG is None or self.ND1 is None or self.NE2 is None or self.CD2 is None or self.CE1 is None:
                report(LOG_EXTRA,
                       'Rejecting ' + self.name + ' due to missing atoms.')
                self.valid = False
                return

            # Check for something covalently bound (outside of the ring):
            allowed_bonds = {
                self.ND1: {' CG ', ' CE1'},
                self.NE2: {' CE1', ' CD2'},
                self.CE1: {' ND1', ' NE2'},
                self.CD2: {' NE2', ' CG '},
            }
            for heavy, expected_names in allowed_bonds.items():
                bound_names = []
                for bond in ct.atom[heavy].bond:
                    if bond.atom2.atomic_number != 1 and bond.order != 0:
                        bound_names.append(bond.atom2.pdbname)
                if len(bound_names) > 2 or set(bound_names) != expected_names:
                    report(LOG_EXTRA,
                           'Rejecting ' + self.name + ' due to covalent bond.')
                    self.valid = False
                    return
            flip = 0
            protonate = 0
            flip_tag = re.compile('Flip')
            if STATE_PROPERTY in ct.atom[self.CG].property and \
             flip_tag.match(ct.atom[self.CG].property[STATE_PROPERTY]):
                flip = 1
            else:
                flip = 0
            if self.HD1 is not None:
                if self.HE2 is not None:
                    protonation = 2  # HIP
                else:
                    protonation = 0  # HID
            else:
                protonation = 1  # HIE
            self.current_state = protonation + flip * 3
            self.initial_state = protonation + flip * 3
            return

[docs]        def pre_treat_1(self, ct):
            ct.atom[self.ND1].formal_charge = 1
            ct.atom[self.NE2].formal_charge = 0
            ct.getBond(self.ND1, self.CE1).type = structure.BondType.Double
            ct.getBond(self.CD2, self.NE2).type = structure.BondType.Single
            ct.getBond(self.CE1, self.NE2).type = structure.BondType.Single
            ct.getBond(self.CG, self.CD2).type = structure.BondType.Double

[docs]        def pre_treat_2(self, ct):
            residue_atoms = ct.getResidueAtoms(self.CG)
            for hyd in residue_atoms:
                if hyd.atomic_number != 1:
                    continue
                for heavy in hyd.bonded_atoms:
                    bond = ct.getBond(heavy, hyd)
                    if bond.order == 0:
                        continue
                    if int(heavy) == self.ND1:
                        self.HD1 = int(hyd)
                        hyd.property['i_pa_atomindex'] = int(hyd)
                    elif int(heavy) == self.NE2:
                        self.HE2 = int(hyd)
                        hyd.property['i_pa_atomindex'] = int(hyd)
            if self.current_state > 2 and not self.locked:
                # Needs to be flipped back, for bookkeeping to be consistent.
                atoms_to_swap = [(self.ND1, self.CD2), (self.HD1, self.HD2),
                                 (self.NE2, self.CE1), (self.HE2, self.HE1)]
                for atoms in atoms_to_swap:
                    self.swap_atoms(ct, atoms[0], atoms[1])
                self.current_state -= 3
            self.treated = True
            return

[docs]        def enumerate_states(self,
                             ct,
                             acceptors,
                             donors,
                             pH,
                             do_flips=True,
                             include_initial=False):
            """
            Enumerate histidine states and create penalties for certain states.

            :param ct: Structure to generate states for
            :type ct: Structure
            :param acceptors: List of acceptor atom indices
            :type acceptors: List[int]
            :param donors: List of donor pair atom indices
            :type donors: List[Tuple[int, int]]
            :param pH: pH of system to determine pH-related penalties
            :type pH: float
            :param do_flips: Include flipped histidine states in pool
            :type do_flips: bool
            :param include_initial: Does nothing here
            """
            if do_flips:
                self.nstates = 6
                self.state_names = [
                    'HID', 'HIE', 'HIP', 'Flip HID', 'Flip HIE', 'Flip HIP'
                ]
            else:
                self.nstates = 3
                self.state_names = ['HID', 'HIE', 'HIP']

            if pH is None:
                self.pH_penalties = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
            elif pH in [pH_vlow, pH_low]:
                self.pH_penalties = [4.0, 4.0, 0.0, 4.0, 4.0, 0.0]
            else:
                self.pH_penalties = [0.0, 0.0, 4.0, 0.0, 0.0, 4.0]
            self.contact_penalties = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
            for iatom in [self.HD1, self.HD2, self.HE1, self.HE2]:
                assert iatom is not None
                for iacceptor in acceptors:
                    assert iacceptor is not None
                    if ct.measure(iatom, iacceptor) < 2.1:
                        formal_charge = 0.0
                        if ct.atom[iacceptor].formal_charge < 0.0:
                            formal_charge = ct.atom[iacceptor].formal_charge
                        # Check if it's a carboxylate.
                        elif ct.atom[iacceptor].atomic_number == 8:
                            connected_atoms = analyze.evaluate_asl(
                                ct,
                                'not (atom.num %d) and withinbonds 2 (atom.num %d)'
                                % (iacceptor, iacceptor))
                            for jatom in connected_atoms:
                                if ct.atom[jatom].atomic_number == 8 and ct.atom[
                                        jatom].formal_charge < 0.0:
                                    formal_charge = ct.atom[jatom].formal_charge
                        if iatom == self.HD1:
                            report(
                                LOG_DEBUG, self.name + ' : HD1 is near ' +
                                str(ct.atom[iacceptor].resnum) + ':' +
                                ct.atom[iacceptor].pdbname)
                            self.contact_penalties[1] += 10.0
                            self.contact_penalties[3] += 10.0
                            self.contact_penalties[4] += 10.0
                            self.contact_penalties[5] += 10.0
                        if iatom == self.HD2:
                            report(
                                LOG_DEBUG, self.name + ' : HD2 is near ' +
                                str(ct.atom[iacceptor].resnum) + ':' +
                                ct.atom[iacceptor].pdbname)
                            self.contact_penalties[0] += 10.0
                            self.contact_penalties[1] += 10.0
                            self.contact_penalties[2] += 10.0
                            self.contact_penalties[4] += 10.0
                        if iatom == self.HE1:
                            report(
                                LOG_DEBUG, self.name + ' : HE1 is near ' +
                                str(ct.atom[iacceptor].resnum) + ':' +
                                ct.atom[iacceptor].pdbname)
                            self.contact_penalties[0] += 10.0
                            self.contact_penalties[1] += 10.0
                            self.contact_penalties[2] += 10.0
                            self.contact_penalties[3] += 10.0
                        if iatom == self.HE2:
                            report(
                                LOG_DEBUG, self.name + ' : HE2 is near ' +
                                str(ct.atom[iacceptor].resnum) + ':' +
                                ct.atom[iacceptor].pdbname)
                            self.contact_penalties[0] += 10.0
                            self.contact_penalties[3] += 10.0
                            self.contact_penalties[4] += 10.0
                            self.contact_penalties[5] += 10.0
                        # Only want to enforce this rule if not using PropKa.
                        if (formal_charge < 0.0 or ct.atom[iacceptor].pdbname
                                in [' OD1', ' OD2', ' OE1', ' OE2'
                                   ]) and (pH is not None):
                            report(
                                LOG_DEBUG, self.name + ' salt-bridged to ' +
                                ct.atom[iacceptor].pdbres +
                                str(ct.atom[iacceptor].resnum))
                            self.contact_penalties[0] += 10.0
                            self.contact_penalties[1] += 10.0
                            self.contact_penalties[3] += 10.0
                            self.contact_penalties[4] += 10.0
                        break
                for donor in donors:
                    # ASP and GLU can also be in donors, if they're being predicted.
                    if ct.atom[donor[0]].pdbres in [
                            "ASP ", "GLU "
                    ] and ct.atom[donor[0]].pdbname in [
                            ' OD1', ' OD2', ' OE1', ' OE2'
                    ]:
                        # Must be in contact, and we aren't using PropKa.
                        if ct.measure(iatom, donor[0]) < 2.0 and pH is not None:
                            report(
                                LOG_DEBUG, self.name + ' salt-bridged to ' +
                                ct.atom[donor[0]].pdbres +
                                str(ct.atom[donor[0]].resnum))
                            self.contact_penalties[0] += 10.0
                            self.contact_penalties[1] += 10.0
                            self.contact_penalties[3] += 10.0
                            self.contact_penalties[4] += 10.0
                    # Metal will force a particular state, which overrides all else.
                    if donor[0] == donor[1]:
                        if (ct.atom[donor[0]].atomic_number not in [
                                7, 8
                        ]) and (ct.measure(iatom, donor[0]) < 2.5):
                            if iatom == self.HD1:
                                self.contact_penalties = [
                                    10.0, 0.0, 10.0, 10.0, 10.0, 10.0
                                ]
                            elif iatom == self.HD2:
                                self.contact_penalties = [
                                    10.0, 10.0, 10.0, 10.0, 0.0, 10.0
                                ]
                            elif iatom == self.HE1:
                                self.contact_penalties = [
                                    10.0, 10.0, 10.0, 0.0, 10.0, 10.0
                                ]
                            elif iatom == self.HE2:
                                self.contact_penalties = [
                                    0.0, 10.0, 10.0, 10.0, 10.0, 10.0
                                ]
                            report(
                                LOG_DEBUG, self.name +
                                ' Metal ion detected, restricting state.')
                            break
            self.penalties = []
            for i in range(6):
                self.penalties.append(self.pH_penalties[i] +
                                      self.contact_penalties[i])
            report(LOG_DEBUG, self.name + ' Penalties = ' + str(self.penalties))
            return

[docs]        def lock_protonation(self):
            if self.initial_state in [2, 5]:
                self.contact_penalties = [
                    1000.0, 1000.0, 0.0, 1000.0, 1000.0, 0.0
                ]
            else:
                self.contact_penalties = [0.0, 0.0, 1000.0, 0.0, 0.0, 1000.0]
            self.penalties = [
                self.pH_penalties[i] + self.contact_penalties[i]
                for i in range(6)
            ]
            return

[docs]        def assign_state(self,
                         ct,
                         istate,
                         add_labels=True,
                         label_pkas=False,
                         state_gaps=None,
                         verbose=False):
            if not self.treated:
                self.pre_treat(ct)
            if (istate > 2 and self.current_state <= 2) or \
               (istate <= 2 and self.current_state > 2):
                atoms_to_swap = [(self.ND1, self.CD2), (self.HD1, self.HD2),
                                 (self.NE2, self.CE1), (self.HE2, self.HE1)]
                for atoms in atoms_to_swap:
                    self.swap_atoms(ct, atoms[0], atoms[1])
            protonation = istate - (istate // 3) * 3
            atoms_to_delete = []
            protonation_name = ''
            if protonation == 0:
                ct.atom[self.ND1].formal_charge = 0
                ct.getBond(self.ND1, self.CE1).type = structure.BondType.Single
                ct.getBond(self.NE2, self.CE1).type = structure.BondType.Double
                protonation_name = 'HIS'
                atoms_to_delete = [self.HE2]
            elif protonation == 1:
                ct.atom[self.ND1].formal_charge = 0
                protonation_name = 'HIE'
                atoms_to_delete = [self.HD1]
            elif protonation == 2:
                protonation_name = 'HIP'
            ct.atom[self.CG].label_format = "%UT"
            ct.atom[self.CG].label_color = label_color
            if istate > 2:
                prot_label = 'Flip ' + protonation_name
                ct.atom[self.CG].property[
                    STATE_PROPERTY] = 'Flip ' + protonation_name
            else:
                prot_label = protonation_name
                ct.atom[self.CG].property[STATE_PROPERTY] = protonation_name
            self.assign_state_gap(ct.atom[self.CG],
                                  state_gaps,
                                  report_gaps=verbose)
            if add_labels:
                label = prot_label
            else:
                label = ''
            if self.pka is not None and label_pkas:
                pka = self.pka
                if istate > 2 and self.pka_flip is not None:
                    pka = self.pka_flip
                label += ' %.2f' % pka
                ct.atom[self.CG].property[pka_property] = pka
            ct.atom[self.CG].label_user_text = label
            residue_atoms = ct.getResidueAtoms(self.CG)
            for atom in residue_atoms:
                atom.pdbres = protonation_name
            self.current_state = istate
            self.treated = False
            return (atoms_to_delete)

[docs]        def update_atom_indices(self, ct, new_indices):
            # Gotta be a better way...
            self.CG = self.get_new_index(ct, self.CG, new_indices)
            self.ND1 = self.get_new_index(ct, self.ND1, new_indices)
            self.HD1 = self.get_new_index(ct, self.HD1, new_indices)
            self.NE2 = self.get_new_index(ct, self.NE2, new_indices)
            self.HE2 = self.get_new_index(ct, self.HE2, new_indices)
            self.CD2 = self.get_new_index(ct, self.CD2, new_indices)
            self.HD2 = self.get_new_index(ct, self.HD2, new_indices)
            self.CE1 = self.get_new_index(ct, self.CE1, new_indices)
            self.HE1 = self.get_new_index(ct, self.HE1, new_indices)
            return

[docs]        def get_heavies(self):
            return [self.ND1, self.NE2, self.CD2, self.CE1]

[docs]        def get_state_sites(self, ct, istate):
            if istate == 0:
                return ([self.NE2], [(self.ND1, self.HD1)],
                        [self.HD2, self.HE1], 0.0)
            elif istate == 1:
                return ([self.ND1], [(self.NE2, self.HE2)],
                        [self.HD2, self.HE1], 0.0)
            elif istate == 2:
                return ([], [(self.ND1, self.HD1),
                             (self.NE2, self.HE2)], [self.HD2, self.HE1], 1)
            elif istate == 3:
                return ([self.CE1], [(self.CD2, self.HD2)],
                        [self.HD1, self.HE2], 0.0)
            elif istate == 4:
                return ([self.CD2], [(self.CE1, self.HE1)],
                        [self.HD1, self.HE2], 0.0)
            elif istate == 5:
                return ([], [(self.CD2, self.HD2),
                             (self.CE1, self.HE1)], [self.HD1, self.HE2], 1)
            return

[docs]        def get_view_atoms(self):
            return [self.CG, self.ND1, self.NE2, self.CD2, self.CE1]

[docs]        def get_penalty(self, istate):
            return self.penalties[istate]

[docs]        def get_adjustable_atoms(self):
            return [
                self.ND1, self.HD1, self.NE2, self.HE2, self.CD2, self.HD2,
                self.CE1, self.HE1
            ]

[docs]        def change_pka(self, pka, propka_pH):
            if pka is None:
                reference_pka = 6.10
            else:
                reference_pka = pka
                self.pka = pka
            # If the pKa is within 0.5 of the pH, let the scoring function
            # determine the protonation state
            if math.fabs(propka_pH - reference_pka) <= 0.5:
                self.pH_penalties = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
            elif propka_pH < reference_pka:
                self.pH_penalties = [4.0, 4.0, 0.0, 4.0, 4.0, 0.0]
            else:
                self.pH_penalties = [0.0, 0.0, 4.0, 0.0, 0.0, 4.0]
            new_penalties = []
            something_changed = False
            for i in range(6):
                new_penalties.append(self.pH_penalties[i] +
                                     self.contact_penalties[i])
                if self.penalties[i] != new_penalties[i]:
                    something_changed = True
            self.penalties = new_penalties
            return something_changed

[docs]        def change_empirical_pka(self, pH):
            if self.pka is None:
                return
            penalty = 4.0
            states_to_penalize = []
            delta = self.pka - pH
            if delta > 0.3:
                states_to_penalize += [0, 1]
            elif delta < -0.3:
                states_to_penalize.append(2)
            if self.pka_flip is not None:
                delta = self.pka_flip - pH
                if delta > 0.3:
                    states_to_penalize += [3, 4]
                elif delta < -0.3:
                    states_to_penalize.append(5)

            pH_penalties = [0.0] * 6
            for state in states_to_penalize:
                pH_penalties[state] += penalty
            if self.force_neutral:
                pH_penalties[2] += 100 * penalty
                pH_penalties[5] += 100 * penalty
            if self.force_charged:
                for i in (0, 1, 3, 4):
                    pH_penalties[i] += 100 * penalty
            self.pH_penalties = pH_penalties
            self.penalties = [
                p1 + p2
                for p1, p2 in zip(self.contact_penalties, self.pH_penalties)
            ]

[docs]    class carboxyl_changeable(changeable):
        asl = '(res.ptype \"ASP \",\"ASH \" AND atom.ptype \" CG \") OR (res.ptype \"GLU \",\"GLH \" AND atom.ptype \" CD \")'

[docs]        def __init__(self, ct, iatom):
            self.pka = None
            self.index = 0
            self.name = ''
            self.nstates = 0
            self.num_user_states = 0
            self.state_names = []
            self.carbon = None
            self.oxygen1 = None
            self.oxygen2 = None
            self.hydrogen1 = None
            self.hydrogen2 = None
            self.treated = False
            self.locked = False
            self.valid = True
            self.name = get_residue_string(ct.atom[iatom])
            self.type = 'CARBOXYL'
            residue_atoms = ct.getResidueAtoms(iatom)
            for atom in residue_atoms:
                name = atom.pdbname
                if (atom.pdbres in ["ASP ", "ASH "] and
                        name == " CG ") or (atom.pdbres in ["GLU ", "GLH "] and
                                            name == " CD "):
                    self.carbon = int(atom)
                elif name in [' OD1', ' OE1']:
                    self.oxygen1 = int(atom)
                    for bonded_atom in atom.bonded_atoms:
                        if bonded_atom.atomic_number == 1:
                            self.hydrogen1 = int(bonded_atom)
                elif name in [' OD2', ' OE2']:
                    self.oxygen2 = int(atom)
                    for bonded_atom in atom.bonded_atoms:
                        if bonded_atom.atomic_number == 1:
                            self.hydrogen2 = int(bonded_atom)
            if self.carbon is None or self.oxygen1 is None or self.oxygen2 is None:
                report(LOG_EXTRA,
                       'Rejecting ' + self.name + ' due to missing atoms.')
                self.valid = False
                return
            # Check for something covalently bound.
            for oxygen in [self.oxygen1, self.oxygen2]:
                for bond in ct.atom[oxygen].bond:
                    if bond.atom2.atomic_number != 1 and bond.atom2.pdbname not in [
                            ' CG ', ' CD '
                    ] and bond.order != 0:
                        report(
                            LOG_EXTRA,
                            'Rejecting ' + self.name + ' due to covalent bond.')
                        self.valid = False
                        return
            if self.hydrogen1 is not None:
                self.current_state = 1
                self.initial_state = 1
            elif self.hydrogen2 is not None:
                self.current_state = 2
                self.initial_state = 2
            else:
                self.current_state = 0
                self.initial_state = 0
            return

[docs]        def pre_treat_1(self, ct):
            ct.atom[self.oxygen1].formal_charge = 1
            ct.atom[self.oxygen2].formal_charge = 0
            ct.getBond(self.carbon,
                       self.oxygen1).type = structure.BondType.Double
            ct.getBond(self.carbon,
                       self.oxygen2).type = structure.BondType.Single

[docs]        def pre_treat_2(self, ct):
            residue_atoms = ct.getResidueAtoms(self.carbon)
            for hyd in residue_atoms:
                if hyd.atomic_number != 1:
                    continue
                for heavy in hyd.bonded_atoms:
                    bond = ct.getBond(heavy, hyd)
                    if bond.order == 0:
                        continue
                    if int(heavy) == self.oxygen1:
                        self.hydrogen1 = int(hyd)
                        hyd.property['i_pa_atomindex'] = int(hyd)
                    elif int(heavy) == self.oxygen2:
                        self.hydrogen2 = int(hyd)
                        hyd.property['i_pa_atomindex'] = int(hyd)
            self.treated = True
            return

[docs]        def enumerate_states(self,
                             ct,
                             acceptors,
                             donors,
                             pH,
                             do_flips=True,
                             include_initial=False):
            self.contact_penalties = [0.0, 0.0, 0.0]
            self.nstates = 3
            self.state_names = ['Charged', 'Neutral 1', 'Neutral 2']
            if pH is None:
                self.pH_penalties = [0.0, 0.0, 0.0]
            elif pH in [pH_vlow]:
                self.pH_penalties = [5.0, 0.0, 0.0]
            else:
                self.pH_penalties = [0.0, 5.0, 5.0]
            for iatom in [self.hydrogen1, self.hydrogen2]:
                # If paired with another ASP/GLU, remove protonation penalties (but don't force protonation)
                for donor in donors:
                    if ct.atom[donor[0]].pdbname in [
                            ' OD1', ' OD2', ' OE1', ' OE2'
                    ] and ct.atom[donor[0]].resnum != ct.atom[iatom].resnum:
                        if ct.measure(iatom, donor[0]) < 2.0:
                            if iatom == self.hydrogen1:
                                report(
                                    LOG_DEBUG,
                                    self.name + ' : hydrogen1 is near ' +
                                    str(ct.atom[donor[0]].resnum) + ':' +
                                    ct.atom[donor[0]].pdbname)
                                self.contact_penalties = [0.0, 0.0, 0.0]
                            elif iatom == self.hydrogen2:
                                report(
                                    LOG_DEBUG,
                                    self.name + ' : hydrogen2 is near ' +
                                    str(ct.atom[donor[0]].resnum) + ':' +
                                    ct.atom[donor[0]].pdbname)
                                self.contact_penalties = [0.0, 0.0, 0.0]
                    # Check for a metal
                    elif donor[0] == donor[1]:
                        if (ct.atom[donor[0]].atomic_number not in [7, 8]) and (
                            (ct.measure(self.oxygen1, donor[0]) < 3.0) or
                            (ct.measure(self.oxygen2, donor[0]) < 3.0)):
                            self.contact_penalties = [0.0, 10.0, 10.0]
                            report(
                                LOG_DEBUG, self.name +
                                ' Metal ion detected, restricting state.')
                            break
                for iacceptor in acceptors:
                    if ct.measure(iatom, iacceptor) < 2.0:
                        if iatom == self.hydrogen1:
                            report(
                                LOG_DEBUG, self.name + ' : hydrogen1 is near ' +
                                str(ct.atom[iacceptor].resnum) + ':' +
                                ct.atom[iacceptor].pdbname)
                            self.contact_penalties[2] += 10.0
                        elif iatom == self.hydrogen2:
                            report(
                                LOG_DEBUG, self.name + ' : hydrogen2 is near ' +
                                str(ct.atom[iacceptor].resnum) + ':' +
                                ct.atom[iacceptor].pdbname)
                            self.contact_penalties[1] += 10.0
                        break
            self.penalties = []
            for i in range(3):
                self.penalties.append(self.pH_penalties[i] +
                                      self.contact_penalties[i])
            report(LOG_DEBUG, self.name + ' Penalties = ' + str(self.penalties))
            return

[docs]        def lock_protonation(self):
            if self.initial_state == 0:
                self.contact_penalties = [0.0, 1000.0, 1000.0]
            else:
                self.contact_penalties = [1000.0, 0.0, 0.0]
            self.penalties = [
                self.pH_penalties[i] + self.contact_penalties[i]
                for i in range(3)
            ]
            return

[docs]        def assign_state(self,
                         ct,
                         istate,
                         add_labels=True,
                         label_pkas=False,
                         state_gaps=None,
                         verbose=False):
            if not self.treated:
                self.pre_treat(ct)
            atoms_to_delete = []
            ct.atom[self.carbon].label_format = "%UT"
            ct.atom[self.carbon].label_color = label_color
            if (istate == 1 and
                    self.current_state != 1) or (istate != 1 and
                                                 self.current_state == 1):
                atoms_to_swap = [(self.oxygen1, self.oxygen2),
                                 (self.hydrogen1, self.hydrogen2)]
                for atoms in atoms_to_swap:
                    self.swap_atoms(ct, atoms[0], atoms[1])
                # Unlike His/Asn/Gln, we aren't preserving the flip info, so update these.
                self.oxygen1, self.oxygen2 = self.oxygen2, self.oxygen1
                self.hydrogen1, self.hydrogen2 = self.hydrogen2, self.hydrogen1
            if istate == 0:
                protonated = False
                ct.atom[self.oxygen1].formal_charge = 0
                ct.atom[self.oxygen2].formal_charge = -1
                ct.getBond(self.carbon,
                           self.oxygen1).type = structure.BondType.Double
                ct.getBond(self.carbon,
                           self.oxygen2).type = structure.BondType.Single
                atoms_to_delete = [self.hydrogen1, self.hydrogen2]
                prot_label = ''
            elif istate == 1:
                protonated = True
                ct.atom[self.oxygen1].formal_charge = 0
                ct.atom[self.oxygen2].formal_charge = 0
                ct.getBond(self.carbon,
                           self.oxygen1).type = structure.BondType.Single
                ct.getBond(self.carbon,
                           self.oxygen2).type = structure.BondType.Double
                atoms_to_delete = [self.hydrogen2]
                prot_label = 'Neutral'
            elif istate == 2:
                protonated = True
                ct.atom[self.oxygen1].formal_charge = 0
                ct.atom[self.oxygen2].formal_charge = 0
                ct.getBond(self.carbon,
                           self.oxygen1).type = structure.BondType.Double
                ct.getBond(self.carbon,
                           self.oxygen2).type = structure.BondType.Single
                atoms_to_delete = [self.hydrogen1]
                prot_label = 'Neutral'
            if add_labels:
                label = prot_label
            else:
                label = ''
            if self.pka is not None and label_pkas:
                label += ' %.2f' % self.pka
                ct.atom[self.carbon].property[pka_property] = self.pka
            ct.atom[self.carbon].label_user_text = label
            # Set the residue name.
            if ct.atom[self.carbon].pdbres[0:2] == 'AS':
                if protonated:
                    protonation_name = 'ASH '
                else:
                    protonation_name = 'ASP '
            if ct.atom[self.carbon].pdbres[0:2] == 'GL':
                if protonated:
                    protonation_name = 'GLH '
                else:
                    protonation_name = 'GLU '
            self.assign_state_gap(ct.atom[self.carbon],
                                  state_gaps,
                                  report_gaps=verbose)
            residue_atoms = ct.getResidueAtoms(self.carbon)
            for atom in residue_atoms:
                atom.pdbres = protonation_name
            self.current_state = istate
            self.treated = False
            return atoms_to_delete

[docs]        def update_atom_indices(self, ct, new_indices):
            # Gotta be a better way...
            self.carbon = self.get_new_index(ct, self.carbon, new_indices)
            self.oxygen1 = self.get_new_index(ct, self.oxygen1, new_indices)
            self.oxygen2 = self.get_new_index(ct, self.oxygen2, new_indices)
            self.hydrogen1 = self.get_new_index(ct, self.hydrogen1, new_indices)
            self.hydrogen2 = self.get_new_index(ct, self.hydrogen2, new_indices)
            return

[docs]        def get_heavies(self):
            return [self.oxygen1, self.oxygen2]

[docs]        def get_state_sites(self, ct, istate):
            if istate == 0:
                return ([self.oxygen1, self.oxygen2], [], [], 0.0)
            elif istate == 1:
                return ([self.oxygen1, self.oxygen2],
                        [(self.oxygen1, self.hydrogen1)], [], 0.0)
            elif istate == 2:
                return ([self.oxygen1, self.oxygen2],
                        [(self.oxygen2, self.hydrogen2)], [], 0.0)
            return

[docs]        def get_view_atoms(self):
            return [self.carbon, self.oxygen1, self.oxygen2]

[docs]        def get_penalty(self, istate):
            return self.penalties[istate]

[docs]        def get_adjustable_atoms(self):
            return [self.oxygen1, self.oxygen2]

[docs]        def change_pka(self, pka, propka_pH):
            if pka is None:
                reference_pka = 4.00
            else:
                reference_pka = pka
                self.pka = pka
            # propka sometimes has some very large pKas for ASP and GLU
            # so we never want to force carboxlyate to be protonated.  If the
            # propKa is lower than the reference then we should (almost)
            # always deprotonate.  If the pKa is larger than the reference
            # then we should allow the hbonding pattern to determine whether
            # the system should be protonated.  Unlike other residues, a
            # very large pKa will never (almost) force a protonated state.
            if (reference_pka >= propka_pH):
                self.pH_penalties = [0.0, 0.0, 0.0]
            else:
                self.pH_penalties = [0.0, 4.0, 4.0]

            new_penalties = []
            something_changed = False
            for i in range(3):
                new_penalties.append(self.pH_penalties[i] +
                                     self.contact_penalties[i])
                if self.penalties[i] != new_penalties[i]:
                    something_changed = True
            self.penalties = new_penalties
            return something_changed

[docs]        def change_empirical_pka(self, pH):
            if self.pka is None:
                return
            delta = self.pka - pH
            if delta < 0.3:
                # We really do not want to protonate the Glu/Asp unless the pKa
                # is significanlty lower than the pH
                pH_penalties = [0, 100, 100]
            elif delta > 0.3:
                # Only add a modest pH penalty to the charged state in case a
                # clash occurs in the protonated state.
                pH_penalties = [10, 0, 0]
            self.pH_penalties = pH_penalties
            self.penalties = [
                p1 + p2 for p1, p2 in zip(self.contact_penalties, pH_penalties)
            ]

[docs]    class rotatable_changeable(changeable):
        asl = '((res.ptype \"CYS \",\"CYT \") AND (atom.ptype \" SG \") AND (atom.formal -1)) OR ((res.ptype \"TYR \") AND (atom.ptype \" OH \") AND (atom.formal -1)) OR (( atom.ele H AND not /C0-H0/ AND not /N0-H0/ ) AND NOT (res.ptype \"HOH\",\"DOD\",\"SPC\",\"ASH\",\"GLH\",\"ASP\",\"GLU\" ))'

[docs]        def __init__(self, ct, iatom):
            self.pka = None
            self.index = 0
            self.nstates = 0
            self.num_user_states = 0
            self.state_names = []
            self.dihedrals = []
            self.valid = True
            self.name = get_atom_string(ct.atom[iatom])
            self.type = 'ROTATABLE'
            self.ionizable = False
            self.current_state = 0
            self.initial_state = 0
            # If it's a deprotonated CYS/TYR
            if ct.atom[iatom].pdbname == " SG " or (
                    ct.atom[iatom].pdbres == "TYR " and
                    ct.atom[iatom].pdbname == " OH "):
                # Abort if it's coordinated to a metal
                for heavy_bond in ct.atom[iatom].bond:
                    if heavy_bond.order == 0:
                        report(
                            LOG_EXTRA, 'Rejecting ' + self.name +
                            ' - coordinated to a metal.')
                        self.valid = False
                        return
                # Set state to -1 now, and map it to the last state later in enumerate_states.
                self.current_state = -1
                self.initial_state = -1
                ct.atom[iatom].formal_charge = 0
                build.add_hydrogens(ct, atom_list=[iatom])
                for bonded_atom in ct.atom[iatom].bonded_atoms:
                    if bonded_atom.atomic_number == 1:
                        self.hydrogen = int(bonded_atom)
                        break
            else:
                self.hydrogen = iatom
            try:
                dihedral_atoms = self.get_dihedral_atoms(ct, self.hydrogen)
            except:
                self.valid = False
                return
            if len(dihedral_atoms) < 4:
                report(
                    LOG_EXTRA,
                    'Rejecting ' + self.name + ' due to lack of rotatability.')
                self.valid = False
                return
            if ct.atom[dihedral_atoms[1]].bond_total != 2:
                report(
                    LOG_EXTRA,
                    'Rejecting ' + self.name + ' - parent has too many bonds.')
                self.valid = False
                return
            self.parent = dihedral_atoms[1]
            self.parentparent = dihedral_atoms[2]
            self.parentparentparent = dihedral_atoms[3]
            self.treated = True
            self.locked = False
            nbonds = ct.atom[self.parentparent].bond_total
            if nbonds == 3:
                self.periodicity = 2
            elif nbonds == 4:
                self.periodicity = 3
                # Whether or not the atom at a given dihedral position is heavy.
                self.eclipse_is_heavy = []
                # Find out which are heavy/hydrogen.
                for atom in ct.atom[self.parentparent].bonded_atoms:
                    if atom.atomic_number == 1:
                        heavy = False
                    else:
                        heavy = True
                    if int(atom) == self.parentparentparent:
                        # 0 position.
                        self.eclipse_is_heavy.append((0.0, heavy))
                    elif int(atom) != self.parent:
                        dihedral = ct.measure(self.parentparentparent,
                                              self.parentparent, self.parent,
                                              atom)
                        self.eclipse_is_heavy.append((dihedral, heavy))
            else:
                self.periodicity = 0

            # Never ionize a rotatable as we don't have data a dataset to test
            # our accuracy
            #if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ', 'TYR ']:
            #    self.ionizable = True
            return

[docs]        def pre_treat_1(self, ct):
            ct.atom[self.parent].formal_charge = 0
            return

[docs]        def pre_treat_2(self, ct):
            residue_atoms = ct.getResidueAtoms(self.parent)
            for hyd in residue_atoms:
                if hyd.atomic_number != 1:
                    continue
                for heavy in hyd.bonded_atoms:
                    bond = ct.getBond(heavy, hyd)
                    if bond.order == 0:
                        continue
                    if int(heavy) == self.parent:
                        self.hydrogen = int(hyd)
                        hyd.property['i_pa_atomindex'] = int(hyd)
            self.treated = True
            return

[docs]        def enumerate_states(self,
                             ct,
                             acceptors,
                             donors,
                             pH,
                             do_flips=True,
                             include_initial=False):
            distance_check = self.max_hbond_distance
            if include_initial:
                # Include initial state.
                dihedral = ct.measure(self.parentparentparent,
                                      self.parentparent, self.parent,
                                      self.hydrogen)
                self.dihedrals.append(dihedral)
                self.state_names.append('Initial')
                self.nstates = 1
            initial_nstates = self.nstates
            # Keep trying until at least one state is found, increasing the distance each time.
            while self.nstates == initial_nstates:
                for (atom1, atom2) in [
                    (acceptor, None) for acceptor in acceptors
                ] + donors:
                    if atom1 == self.parent or atom1 == self.parentparent:
                        continue
                    # Trim any states pointing head-to-head with an N-H.
                    distance = ct.measure(self.parent, atom1)
                    if distance <= distance_check:
                        angle = ct.measure(self.parentparent, self.parent,
                                           atom1)
                        if (angle <= self.hbond_heavy_max_angle) and (
                                angle >= self.hbond_heavy_min_angle):
                            # Measure what the angle to the atom is.
                            dihedral = ct.measure(self.parentparentparent,
                                                  self.parentparent,
                                                  self.parent, atom1)
                            self.nstates += 1
                            self.dihedrals.append(dihedral)
                            self.state_names.append("Orientation")
                used_donor_heavies = set()
                for pair in donors:
                    if pair[0] == self.parent or pair[0] == self.parentparent:
                        continue
                    if pair[0] in used_donor_heavies:
                        continue
                    used_donor_heavies.add(pair[0])
                    distance = ct.measure(self.parent, pair[0])
                    if distance <= distance_check:
                        # Measure what the angle to the atom is (should it be from the heavy or the H?).
                        dihedral = ct.measure(self.parentparentparent,
                                              self.parentparent, self.parent,
                                              pair[0])
                        # Add +/-120 to it.
                        dihedral += 120.0
                        if dihedral > 180.0:
                            dihedral -= 360.0
                        elif dihedral < -180.0:
                            dihedral += 360.0
                        self.nstates += 1
                        self.dihedrals.append(dihedral)
                        self.state_names.append("Orientation")
                        dihedral -= 240.0
                        if dihedral > 180.0:
                            dihedral -= 360.0
                        elif dihedral < -180.0:
                            dihedral += 360.0
                        self.nstates += 1
                        self.dihedrals.append(dihedral)
                        self.state_names.append("Orientation")
                distance_check += 0.25
                if distance_check >= 50.0:
                    self.nstates += 1
                    self.dihedrals.append(0.0)
                    self.state_names.append("Orientation")
                    break
            # Add one state that doesn't touch anything, just in case.
            if ct.atom[self.hydrogen].pdbres != "TYR ":
                dihedral_save = ct.measure(self.parentparentparent,
                                           self.parentparent, self.parent,
                                           self.hydrogen)
                for idihedral in range(36):
                    ct.adjust(
                        float(idihedral) * 10.0, self.parentparentparent,
                        self.parentparent, self.parent, self.hydrogen)
                    nclashes = analyze.evaluate_asl(
                        ct,
                        '(not (atom.num %d) and within 2 (atom.num %d)) and not (withinbonds 2 (atom.num %d))'
                        % (self.hydrogen, self.hydrogen, self.hydrogen))
                    if len(nclashes) == 0:
                        self.nstates += 1
                        self.dihedrals.append(float(idihedral) * 10.0)
                        self.state_names.append("Default")
                        break
                ct.adjust(dihedral_save, self.parentparentparent,
                          self.parentparent, self.parent, self.hydrogen)
            # Trim any disallowed states.
            for istate in range(len(self.dihedrals) - 1, -1, -1):
                idihedral = self.dihedrals[istate]
                iname = self.state_names[istate]
                # Don't trim original state
                if (istate == 0) and include_initial:
                    continue
                if ct.atom[self.hydrogen].pdbres == "TYR " and math.fabs(
                        idihedral) > 30.0 and math.fabs(idihedral) < 150.0:
                    if math.fabs(idihedral) < 40.0:
                        report(
                            LOG_FULL_DEBUG, 'Adjusting state ' + iname + ' (' +
                            str(idihedral) + ') to be more planar.')
                        if idihedral < 0.0:
                            self.dihedrals[istate] = -30.0
                            report(LOG_FULL_DEBUG, '  Adjusted to -30.0')
                        else:
                            self.dihedrals[istate] = 30.0
                            report(LOG_FULL_DEBUG, '  Adjusted to 30.0')
                    elif math.fabs(idihedral) > 140.0:
                        report(
                            LOG_FULL_DEBUG, 'Adjusting state ' + iname + ' (' +
                            str(idihedral) + ') to be more planar.')
                        if idihedral < 0.0:
                            self.dihedrals[istate] = -150.0
                            report(LOG_FULL_DEBUG, '  Adjusted to -150.0')
                        else:
                            self.dihedrals[istate] = 150.0
                            report(LOG_FULL_DEBUG, '  Adjusted to 150.0')
                    else:
                        report(
                            LOG_FULL_DEBUG, 'Removing state ' + iname + ' (' +
                            str(idihedral) + ') - too far out of plane.')
                        self.dihedrals.remove(idihedral)
                        self.state_names.remove(iname)
                        self.nstates -= 1
                elif self.periodicity == 3:
                    for (angle, heavy) in self.eclipse_is_heavy:
                        if heavy:
                            cutoff = 30.0
                        else:
                            cutoff = 10.0
                        if math.fabs(idihedral - angle) < cutoff:
                            report(
                                LOG_FULL_DEBUG,
                                'Removing state ' + iname + ' (' +
                                str(idihedral) + ') - too close to eclipsed.')
                            try:
                                self.dihedrals.remove(idihedral)
                                self.state_names.remove(iname)
                                self.nstates -= 1
                            except:
                                report(
                                    LOG_NONE,
                                    'ERROR: Failure removing state for ' +
                                    self.name +
                                    ' - Please check this residue for structural inconsistencies.'
                                )
            if ct.atom[self.hydrogen].pdbres == "TYR ":
                self.nstates += 1
                self.dihedrals.append(0.0)
                self.state_names.append("Default 1")
                self.nstates += 1
                self.dihedrals.append(180.0)
                self.state_names.append("Default 2")
            # Add standard staggered states.
            elif self.periodicity == 3:
                self.nstates += 1
                self.dihedrals.append(60.0)
                self.state_names.append("Stagger 1")
                self.nstates += 1
                self.dihedrals.append(180.0)
                self.state_names.append("Stagger 2")
                self.nstates += 1
                self.dihedrals.append(300.0)
                self.state_names.append("Stagger 3")
            # Trim any redundant states.
            for istate in range(len(self.dihedrals) - 1, 0, -1):
                idihedral = self.dihedrals[istate]
                iname = self.state_names[istate]
                for jstate in range(istate - 1, -1, -1):
                    jdihedral = self.dihedrals[jstate]
                    jname = self.state_names[jstate]
                    if abs(idihedral - jdihedral) < 10.0:
                        report(
                            LOG_FULL_DEBUG, 'Removing state ' + iname + ' (' +
                            str(idihedral) + ') - too similar to ' + jname +
                            '(' + str(idihedral) + ')')
                        self.dihedrals.remove(idihedral)
                        self.state_names.remove(iname)
                        self.nstates -= 1
                        break
            # Name the states
            if include_initial:
                for istate in range(1, len(self.state_names)):
                    self.state_names[istate] += ' %d/%d' % (istate,
                                                            self.nstates - 1)
            else:
                for istate in range(0, len(self.state_names)):
                    self.state_names[istate] += ' %d/%d' % (istate + 1,
                                                            self.nstates)
            # TODO Do not create a neutral state for these, as
            # there we have no insight into the accuracy of pKa prediction for
            # these.
            #if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ']:
            #    self.state_names.append('CYT ')
            #    self.dihedrals.append(None)
            #    self.nstates += 1
            #elif ct.atom[self.parent].pdbres in ['TYR ']:
            #    self.state_names.append('Charged')
            #    self.dihedrals.append(None)
            #    self.nstates += 1
            self.pH_penalties = []
            for istate in range(self.nstates):
                if self.ionizable:
                    # Penalties will come later if using PropKa.
                    if pH is None:
                        self.pH_penalties.append(0.0)
                        continue
                    # Not using PropKa.
                    if self.dihedrals[istate] is None:
                        if pH is None:
                            self.pH_penalties.append(0.0)
                        elif pH == pH_high:
                            self.pH_penalties.append(0.0)
                        else:
                            # Really don't want to deprotonate at normal pH
                            self.pH_penalties.append(200.0)
                    else:
                        if pH is None:
                            self.pH_penalties.append(0.0)
                        elif pH == pH_high:
                            # Really don't want to deprotonate at normal pH
                            self.pH_penalties.append(200.0)
                        else:
                            self.pH_penalties.append(0.0)
                else:
                    self.pH_penalties.append(0.0)
            if self.current_state == -1:
                self.current_state = self.nstates - 1
            if self.initial_state == -1:
                self.initial_state = self.nstates - 1
            self.contact_penalties = [0.0 for i in range(self.nstates)]
            self.penalties = [
                self.pH_penalties[i] + self.contact_penalties[i]
                for i in range(self.nstates)
            ]
            return

[docs]        def lock_protonation(self):
            if not self.ionizable:
                return
            if self.initial_state == (self.nstates - 1):
                self.contact_penalties = [1000.0 for i in range(self.nstates)]
                self.contact_penalties[-1] = 0.0
            else:
                self.contact_penalties = [0.0 for i in range(self.nstates)]
                self.contact_penalties[-1] = 1000.0
            self.penalties = [
                self.pH_penalties[i] + self.contact_penalties[i]
                for i in range(self.nstates)
            ]
            return

[docs]        def add_current_to_states(self, ct):
            dihedral = ct.measure(self.parentparentparent, self.parentparent,
                                  self.parent, self.hydrogen)
            self.dihedrals.append(dihedral)
            self.nstates += 1
            self.num_user_states += 1
            self.state_names.append("User %d" % self.num_user_states)
            self.penalties.append(0.0)

[docs]        def assign_state(self,
                         ct,
                         istate,
                         add_labels=True,
                         label_pkas=False,
                         state_gaps=None,
                         verbose=False):
            if not self.treated:
                self.pre_treat(ct)
            atoms_to_delete = []
            ct.atom[self.parent].label_format = "%UT"
            ct.atom[self.parent].label_color = label_color
            if self.dihedrals[istate] is None:
                ct.atom[self.parent].formal_charge = -1
                atoms_to_delete = [self.hydrogen]
                if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ']:
                    residue_name = 'CYT '
                    prot_label = 'CYT'
                elif ct.atom[self.parent].pdbres == 'TYR ':
                    residue_name = 'TYR '
                    prot_label = 'Charged'
            else:
                ct.adjust(self.dihedrals[istate], self.parentparentparent,
                          self.parentparent, self.parent, self.hydrogen)
                if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ']:
                    residue_name = 'CYS '
                elif ct.atom[self.parent].pdbres == 'TYR ':
                    residue_name = 'TYR '
                prot_label = ''
            if add_labels:
                label = prot_label
            else:
                label = ''
            if self.pka is not None and label_pkas:
                label += ' %.2f' % self.pka
                ct.atom[self.parent].property[pka_property] = self.pka
            ct.atom[self.parent].label_user_text = label
            self.assign_state_gap(ct.atom[self.parent],
                                  state_gaps,
                                  report_gaps=verbose)
            if self.ionizable:
                residue_atoms = ct.getResidueAtoms(self.parent)
                for atom in residue_atoms:
                    atom.pdbres = residue_name
            self.current_state = istate
            self.treated = False
            return atoms_to_delete

[docs]        def update_atom_indices(self, ct, new_indices):
            # Gotta be a better way...
            self.hydrogen = self.get_new_index(ct, self.hydrogen, new_indices)
            self.parent = self.get_new_index(ct, self.parent, new_indices)
            self.parentparent = self.get_new_index(ct, self.parentparent,
                                                   new_indices)
            self.parentparentparent = self.get_new_index(
                ct, self.parentparentparent, new_indices)
            return

[docs]        def get_heavies(self):
            return [self.parent]

[docs]        def get_state_sites(self, ct, istate):
            if self.dihedrals[istate] is None:
                return ([self.parent], [], [], 0.0)
            else:
                ct.adjust(self.dihedrals[istate], self.parentparentparent,
                          self.parentparent, self.parent, self.hydrogen)
                return ([self.parent], [(self.parent, self.hydrogen)], [], 0.0)

[docs]        def get_view_atoms(self):
            return [self.parent]

[docs]        def get_penalty(self, istate):
            return self.penalties[istate]

[docs]        def get_adjustable_atoms(self):
            return [self.hydrogen, self.parent]

[docs]        def change_pka(self, pka, propka_pH):
            if pka is None:
                reference_pka = 15.00
            else:
                reference_pka = pka
                self.pka = pka
            if not self.ionizable:
                return False
            if reference_pka < propka_pH:
                ionized = True
            else:
                ionized = False
            new_penalties = []
            changed = False
            for istate, dihedral in enumerate(self.dihedrals):
                if dihedral is None:
                    if ionized:
                        new_penalties.append(0.0 +
                                             self.contact_penalties[istate])
                    else:
                        # Really don't want to deprotonate at normal pH
                        new_penalties.append(200.0 +
                                             self.contact_penalties[istate])
                else:
                    if ionized:
                        new_penalties.append(200.0 +
                                             self.contact_penalties[istate])
                    else:
                        new_penalties.append(0.0 +
                                             self.contact_penalties[istate])
                if self.penalties[istate] != new_penalties[istate]:
                    changed = True
            self.penalties = new_penalties
            return changed

[docs]        def change_empirical_pka(self, pH):
            # Never deprotonate
            penalty = 200.0
            pH_penalties = []
            for dihedral in self.dihedrals:
                # Deprotonated state
                if dihedral is None:
                    pH_penalties.append(penalty)
                else:
                    pH_penalties.append(0)
            self.pH_penalties = pH_penalties
            self.penalties = [
                p1 + p2 for p1, p2 in zip(self.contact_penalties, pH_penalties)
            ]

[docs]    class amine_changeable(changeable):
        # Could be used for all primary amines, but for now let's stick to LYS sidechains.
        asl = '((res.ptype \"LYS \",\"LYN \") AND (atom.ptype \" NZ \"))'

[docs]        def __init__(self, ct, iatom):
            self.pka = None
            self.index = 0
            self.nstates = 0
            self.num_user_states = 0
            self.state_names = []
            self.nitrogen = None
            self.carbon = None
            self.hyds = []
            self.hyd_coords = []
            self.initial_coords = []
            self.locked = False
            self.valid = True
            self.name = get_residue_string(ct.atom[iatom])
            self.type = 'AMINE'
            self.initial_coords = []

            for atom in ct.atom[iatom].bonded_atoms:
                if atom.atomic_number == 1:
                    self.hyds.append(int(atom))
                elif self.carbon is None:
                    self.carbon = int(atom)
                else:
                    report(LOG_EXTRA,
                           'Rejecting ' + self.name + ' due to covalent bond.')
                    self.valid = False
                    return
            if len(self.hyds) not in [2, 3]:
                report(
                    LOG_EXTRA,
                    'Rejecting ' + self.name + ' due to incorrect bound count.')
                self.valid = False
                return
            self.nitrogen = iatom
            for hyd in self.hyds:
                self.initial_coords.append(ct.atom[hyd].xyz)
            self.current_state = 0
            self.initial_state = 0
            return

[docs]        def pre_treat_1(self, ct):
            ct.atom[self.nitrogen].formal_charge = 1
            return

[docs]        def pre_treat_2(self, ct):
            self.hyds = []
            for hyd in ct.atom[self.nitrogen].bonded_atoms:
                if hyd.atomic_number == 1:
                    self.hyds.append(int(hyd))
                    hyd.property['i_pa_atomindex'] = int(hyd)
            self.treated = True
            return

[docs]        def enumerate_states(self,
                             ct,
                             acceptors,
                             donors,
                             pH,
                             do_flips=True,
                             sample_neutral_states=False,
                             include_initial=False):
            """
            Generate states for lysines.

            States are generated by rotating hydrogens for acceptor/donor
            interactions and by optionally including the neutral state.

            :param ct: Structure to generate states for
            :type ct: Structure
            :param acceptors: List of acceptor atom indices
            :type acceptors: List[int]
            :param donors: List of donor atom indices
            :type donors: List[(int, int)]
            :param pH: pH of system
            :type pH: float
            :param do_flips: Does nothing
            :type do_flips: bool
            :param sample_neutral_states: Include neutral states. Since
                PROPKA's pKa prediction is unreliable for Lys, currently we
                have no method of confidently assess whether it is neutral. So
                it's turned off by default.
            :type sample_neutral_states: bool
            :param include_initial: Include the initial state of the Lys
            :type include_initial: bool
            """
            dihedral_atoms = self.get_dihedral_atoms(ct, self.hyds[0])
            distance_check = self.max_hbond_distance
            if include_initial:
                self.hyd_coords.append(self.initial_coords)
                self.state_names.append('Initial')
                self.nstates += 1
            initial_nstates = self.nstates
            while self.nstates == initial_nstates:
                for (atom1, atom2) in [
                    (acceptor, None) for acceptor in acceptors
                ] + donors:
                    if atom1 == self.nitrogen:
                        continue
                    distance = ct.measure(self.nitrogen, atom1)
                    if distance <= distance_check:
                        angle = ct.measure(self.carbon, self.nitrogen, atom1)
                        if (angle <= self.hbond_heavy_max_angle) and (
                                angle >= self.hbond_heavy_min_angle):
                            dihedral = ct.measure(atom1, dihedral_atoms[1],
                                                  dihedral_atoms[2],
                                                  dihedral_atoms[3])
                            eclipsed = False
                            for eclipse_ang in [0.0, 120.0, 240.0]:
                                delta = math.fabs(
                                    math.fabs(dihedral) - eclipse_ang)
                                if delta >= 360.0:
                                    delta -= 360.0
                                if delta < 30.0:
                                    eclipsed = True
                                    break
                            if not eclipsed:
                                ct.adjust(dihedral, dihedral_atoms[3],
                                          dihedral_atoms[2], dihedral_atoms[1],
                                          dihedral_atoms[0])
                                coords = []
                                for hyd in self.hyds:
                                    coords.append(ct.atom[hyd].xyz)
                                self.hyd_coords.append(coords)
                                self.state_names.append("Orientation")
                                self.nstates += 1
                distance_check += 0.25
                if distance_check > MAX_ORIENT_HYDROGEN_DISTANCE:
                    break
            # Add indexes to the names
            if include_initial:
                for istate in range(1, len(self.state_names)):
                    self.state_names[istate] += ' %d/%d' % (istate,
                                                            self.nstates - 1)
            else:
                for istate in range(len(self.state_names)):
                    self.state_names[istate] += ' %d/%d' % (istate + 1,
                                                            self.nstates)
            # Add a standard staggered state.
            ct.adjust(60.0, dihedral_atoms[3], dihedral_atoms[2],
                      dihedral_atoms[1], dihedral_atoms[0])
            coords = []
            for hyd in self.hyds:
                coords.append(ct.atom[hyd].xyz)
            self.hyd_coords.append(coords)
            self.state_names.append("Stagger")
            self.nstates += 1
            self.pH_penalties = []
            # Protonation penalties for charged forms.
            for istate in range(self.nstates):
                self.pH_penalties.append(0.0)
            # After generating the charged states, now generate 3 neutral
            # states for each charged state.
            if sample_neutral_states:
                first_to_expand = 1 if include_initial else 0
                for istate in range(first_to_expand, self.nstates):
                    coords = self.hyd_coords[istate]
                    self.hyd_coords.append([coords[0], coords[1]])
                    self.state_names.append(self.state_names[istate] + ' LYN1')
                    self.hyd_coords.append([coords[0], coords[2]])
                    self.state_names.append(self.state_names[istate] + ' LYN2')
                    self.hyd_coords.append([coords[1], coords[2]])
                    self.state_names.append(self.state_names[istate] + ' LYN3')
                    self.nstates += 3
                    if pH is None:
                        self.pH_penalties.extend([0.0, 0.0, 0.0])
                    else:
                        self.pH_penalties.extend([100.0, 100.0, 100.0])
            self.contact_penalties = [0.0 for i in range(self.nstates)]
            self.penalties = [
                self.pH_penalties[i] + self.contact_penalties[i]
                for i in range(self.nstates)
            ]

[docs]        def lock_protonation(self):
            nhyd_desired = len(self.initial_coords)
            for istate in range(self.nstates):
                if len(self.hyd_coords[istate]) == nhyd_desired:
                    self.contact_penalties[istate] = 0.0
                else:
                    self.contact_penalties[istate] = 1000.0
            self.penalties = [
                self.pH_penalties[i] + self.contact_penalties[i]
                for i in range(self.nstates)
            ]
            return

[docs]        def assign_state(self,
                         ct,
                         istate,
                         add_labels=True,
                         label_pkas=False,
                         state_gaps=None,
                         verbose=False):
            if not self.treated:
                self.pre_treat(ct)
            atoms_to_delete = []
            ct.atom[self.nitrogen].label_format = "%UT"
            ct.atom[self.nitrogen].label_color = label_color
            residue_name = 'LYS '
            prot_label = ''
            if len(self.hyd_coords[istate]) == 2:
                residue_name = 'LYN '
                prot_label = 'Neutral'
                atoms_to_delete.append(self.hyds[2])
                self.hyds[2] = None
                ct.atom[self.nitrogen].formal_charge = 0
            else:
                ct.atom[self.nitrogen].formal_charge = 1
            for ihyd, hyd_coords in enumerate(self.hyd_coords[istate]):
                ct.atom[self.hyds[ihyd]].xyz = hyd_coords
            if add_labels:
                label = prot_label
            else:
                label = ''
            if self.pka is not None and label_pkas:
                label += ' %.2f' % self.pka
                ct.atom[self.nitrogen].property[pka_property] = self.pka
            ct.atom[self.nitrogen].label_user_text = label
            self.assign_state_gap(ct.atom[self.nitrogen],
                                  state_gaps,
                                  report_gaps=verbose)
            residue_atoms = ct.getResidueAtoms(self.nitrogen)
            for atom in residue_atoms:
                atom.pdbres = residue_name
            self.current_state = istate
            self.treated = False
            return atoms_to_delete

[docs]        def update_atom_indices(self, ct, new_indices):
            # Gotta be a better way...
            self.nitrogen = self.get_new_index(ct, self.nitrogen, new_indices)
            self.carbon = self.get_new_index(ct, self.carbon, new_indices)
            for i in range(len(self.hyds)):
                if self.hyds[i] is not None:
                    self.hyds[i] = self.get_new_index(ct, self.hyds[i],
                                                      new_indices)
            return

[docs]        def get_heavies(self):
            return [self.nitrogen]

[docs]        def get_state_sites(self, ct, istate):
            acceptors = []
            donors = []
            for ihyd, hyd_coords in enumerate(self.hyd_coords[istate]):
                ct.atom[self.hyds[ihyd]].xyz = hyd_coords
                donors.append((self.nitrogen, self.hyds[ihyd]))
            if len(self.hyd_coords[istate]) == 2:
                acceptors = [self.nitrogen]
            return (acceptors, donors, [], 0.0)

[docs]        def get_view_atoms(self):
            return [self.nitrogen]

[docs]        def get_penalty(self, istate):
            return self.penalties[istate]

[docs]        def change_pka(self, pka, propka_pH):
            if pka is None:
                reference_pka = 15.00
            else:
                reference_pka = pka
                self.pka = pka
            neutral = reference_pka < propka_pH
            new_penalties = []
            changed = False
            for istate, hyd_coords in enumerate(self.hyd_coords):
                nhyds = len(hyd_coords)
                pH_penalty = 0
                if (nhyds == 2 and not neutral) or (nhyds == 3 and neutral):
                    pH_penalty = 100
                new_penalty = pH_penalty + self.contact_penalties[istate]
                new_penalties.append(new_penalty)
                if self.penalties[istate] != new_penalty:
                    changed = True
            self.penalties = new_penalties
            return changed

[docs]        def change_empirical_pka(self, pH):
            # Never deprotonate
            pH_penalties = []
            penalty = 100
            for hyd_coords in self.hyd_coords:
                nhyds = len(hyd_coords)
                if nhyds == 2:
                    pH_penalties.append(penalty)
                else:
                    pH_penalties.append(0)
            self.pH_penalties = pH_penalties
            self.penalties = [
                p1 + p2 for p1, p2 in zip(self.contact_penalties, pH_penalties)
            ]

[docs]    class water_changeable(changeable):
        asl = "(water) AND (atom.ele O)"
        redundancy_tolerance = 0.5

[docs]        def __init__(self, ct, iatom):
            self.pka = None
            self.index = 0
            self.name = ''
            self.num_user_states = 0
            self.state_names = []
            self.state_coordinates = []
            self.oxygen = iatom
            self.hydrogen1 = None
            self.hydrogen2 = None
            self.valid = True
            self.name = get_residue_string(ct.atom[iatom])
            self.type = 'WATER'
            self.interacts_with_nonwater = False
            for atom in ct.atom[iatom].bonded_atoms:
                if atom.atomic_number != 1:
                    continue
                if self.hydrogen1 is None:
                    self.hydrogen1 = int(atom)
                elif self.hydrogen2 is None:
                    self.hydrogen2 = int(atom)
                else:
                    report(
                        LOG_EXTRA,
                        'Rejecting ' + self.name + ' due to excess hydrogens.')
                    self.valid = False
                    return
            if self.hydrogen1 is None or self.hydrogen2 is None:
                report(LOG_EXTRA,
                       'Rejecting ' + self.name + ' due to lack of hydrogens.')
                self.valid = False
                return
            self.treated = True
            self.locked = False
            self.current_state = 0
            self.initial_state = 0

        @property
        def nstates(self):
            """Return number of enumerated states"""
            return len(self.state_coordinates)

[docs]        def enumerate_states(self,
                             ct,
                             acceptors,
                             donors,
                             pH,
                             do_flips=True,
                             include_initial=False):
            """
            Generate discrete states for water, where a state is defined by the
            coordinates of its two hydrogens.

            :param ct: Structure
            :type ct: Structure
            :param acceptors: Acceptor atom indices
            :type acceptors: List[int]
            :param donors: Donor heavy-hydrogen atom indices
            :type donors: List[Tuple(int, int)]
            :param pH: Does nothing here
            :type pH: float
            :param do_flips: Does nothing here
            :type do_flips: bool
            :param include_initial: Include the current water orientation in the state list
            :type include_initial: bool
            """
            # Get the atom objects for easy access to attributes
            oxygen = ct.atom[self.oxygen]
            hydrogen1 = ct.atom[self.hydrogen1]
            hydrogen2 = ct.atom[self.hydrogen2]
            # Include initial state.
            if include_initial:
                self.state_coordinates.append((hydrogen1.xyz, hydrogen2.xyz))
                self.state_names.append("Initial")

            enumerator = WaterStateEnumerator(ct, ct.atom[self.oxygen],
                                              acceptors, donors)
            states = enumerator.enumerate_donor_donor_states()
            states += enumerator.enumerate_acceptor_acceptor_states()
            # Only enumerate additional less favorable states if the current
            # pool is not too large.
            if len(states) < 30:
                states += enumerator.enumerate_donor_states()
                states += enumerator.enumerate_acceptor_states()
            for state in states:
                self.state_coordinates.append(state.coordinates)
                self.state_names.append("Orientation")
            # In case no acceptors or donors are nearby and no states are
            # generated, just include the initial state.
            if self.nstates == 0:
                self.state_coordinates.append((hydrogen1.xyz, hydrogen2.xyz))
                self.state_names.append("Initial")

            # Name the states
            if include_initial:
                for istate in range(1, len(self.state_names)):
                    self.state_names[istate] += ' %d/%d' % (istate,
                                                            self.nstates - 1)
            else:
                for istate in range(0, len(self.state_names)):
                    self.state_names[istate] += ' %d/%d' % (istate + 1,
                                                            self.nstates)

[docs]        def add_current_to_states(self, ct):
            xyz1 = ct.atom[self.hydrogen1].xyz
            xyz2 = ct.atom[self.hydrogen2].xyz
            self.state_coordinates.append((xyz1, xyz2))
            self.num_user_states += 1
            self.state_names.append("User %d" % self.num_user_states)

[docs]        def assign_state(self,
                         ct,
                         istate,
                         add_labels=True,
                         label_pkas=False,
                         state_gaps=None,
                         verbose=False):
            xyz1, xyz2 = self.state_coordinates[istate]
            ct.atom[self.hydrogen1].xyz = xyz1
            ct.atom[self.hydrogen2].xyz = xyz2
            self.current_state = istate
            return []

[docs]        def update_atom_indices(self, ct, new_indices):
            # Gotta be a better way...
            self.oxygen = self.get_new_index(ct, self.oxygen, new_indices)
            self.hydrogen1 = self.get_new_index(ct, self.hydrogen1, new_indices)
            self.hydrogen2 = self.get_new_index(ct, self.hydrogen2, new_indices)
            return

[docs]        def get_heavies(self):
            return [self.oxygen]

[docs]        def get_state_sites(self, ct, istate):
            xyz1, xyz2 = self.state_coordinates[istate]
            ct.atom[self.hydrogen1].xyz = xyz1
            ct.atom[self.hydrogen2].xyz = xyz2
            return ([self.oxygen], [(self.oxygen, self.hydrogen1),
                                    (self.oxygen, self.hydrogen2)], [], 0.0)

[docs]        def get_view_atoms(self):
            return [self.oxygen, self.hydrogen1, self.hydrogen2]

[docs]        def get_penalty(self, istate):
            return 0.0

[docs]        def get_adjustable_atoms(self):
            return [self.oxygen, self.hydrogen1, self.hydrogen2]

[docs]    class hbond_cluster:

[docs]        def __init__(self):
            self.changeables = []
            self.self_scores = {}
            self.pair_scores = {}
            self.self_charges = {}
            self.combinations = []
            self.static_acceptors = []
            self.static_donors = []
            self.static_clashers = []
            self.max_keep = 10
            self.abort_sampling = False
            self.num_scored = 0
            self.max_score = 100000
            self.use_xtal = False
            self.trouble = False
            self.failed = False
            # Save the cutoff values as an attribute as these can be changed
            # during optimization.
            self.polar_clash_cutoff = POLAR_CLASH_CUTOFF
            self.nonpolar_clash_cutoff = NONPOLAR_CLASH_CUTOFF
            self.acceptor_acceptor_clash_cutoff = ACCEPTOR_ACCEPTOR_CLASH_CUTOFF

[docs]        def setup_xtal(self, ct, interact, clustering_distance):
            # Use a nested defaultdict to be compatible with original
            # implementation while saving on memory. This defaults to False for
            # every input
            self.need_xtal = defaultdict(bool)
            if not self.use_xtal:
                return
            for ichangeable in range(len(self.changeables)):
                changeable1 = self.changeables[ichangeable]
                for jchangeable in range(ichangeable + 1,
                                         len(self.changeables)):
                    changeable2 = self.changeables[jchangeable]
                    if not interact[changeable1.index][changeable2.index]:
                        continue
                    # See if they interact without xtal
                    heavies1 = self.changeables[ichangeable].get_heavies()
                    heavies2 = self.changeables[jchangeable].get_heavies()
                    for iatom, jatom in itertools.product(heavies1, heavies2):
                        if ct.measure(iatom, jatom) <= clustering_distance:
                            # We don't need crystal mate for this changeable
                            break
                    else:
                        self.need_xtal[(changeable1.index,
                                        changeable2.index)] = True
                        self.need_xtal[(changeable2.index,
                                        changeable1.index)] = True
                        report(
                            LOG_EXTRA, self.changeables[ichangeable].name +
                            ' and ' + self.changeables[jchangeable].name +
                            ' interact via crystal symmetry only')

[docs]        def optimize(self,
                     ct,
                     interact,
                     static_donors,
                     static_acceptors,
                     static_clashers,
                     max_comb,
                     num_sequential_cycles,
                     use_propka,
                     propka_pH=7.0,
                     annotated_ct=None):
            """
            Optimize hydrogen bond network and protonation states of
            changeables in this cluster

            :param ct: Structure containing changeables to optimize
            :type ct: Structure
            :param interact: Interaction lookup dict for changeable-changeable interactions
            :type interact: Dict[Dict[bool]]
            :param static_donors: List of static donor atom indices
            :type static_donors: List[Tuple[int, int]]
            :param static_acceptors: List of static acceptor atom indices
            :type static_donors: List[int]
            :param static_clashers: List of static clasher atom indices
            :type static_donors: List[int]
            :param max_comb: Maximum number of combinations at which an exhaustive search is performed.
            :type max_comb: int
            :param num_sequential_cycles: Number of optimization cycles when using heuristic search.
            :type num_sequential_cycles: int
            :param use_propka: Use PROPKA to determine pKa values of changeables
            :type use_propka: bool
            :param propka_pH: pH of system when using PROPKA for pKa determination
            :type propka_pH: float
            :param annotated_ct: Annotated structure that may contain xtal mates for faster optimization. Passing this, skips the creation of crystal mates and structure annotation during the self-scoring phase.
            :type annotated_ct: Structure
            """
            for changeable in self.changeables:
                if not changeable.treated:
                    changeable.pre_treat(ct)
            self.static_acceptors = static_acceptors
            self.static_donors = static_donors
            self.static_clashers = static_clashers
            potential_combinations = 1
            for changeable in self.changeables:
                potential_combinations *= changeable.nstates
            if annotated_ct is None:
                annotated_ct = generate_annotated_ct(ct, static_donors,
                                                     static_acceptors,
                                                     static_clashers,
                                                     self.use_xtal)

            # Initialize and pre-calculate scores
            self.combinations = []
            report(LOG_DEBUG, "    Prescoring self")
            self.pre_score_self(annotated_ct)
            self.pre_score_pairs(ct, interact)
            report(LOG_BASIC)

            if potential_combinations <= max_comb:
                report(
                    LOG_BASIC,
                    '  Scoring exhaustively (%d potential combinations)...' %
                    potential_combinations)
                self.score_exhaustively(ct,
                                        interact,
                                        find_all_solutions=True,
                                        tolerate_clashes=True)
            else:
                if TOULBAR2_AVAILABLE:
                    report(LOG_BASIC, "  Trying exact network solver")
                    report(LOG_BASIC,
                           "    Stage1: Solving network exact and diversify")
                    combinations = self.solve_network_exact_and_diversify(
                        ct, interact, ncombinations=1)
                    for combination in combinations:
                        energy = self.score_combination(ct, interact,
                                                        combination)
                        self.combinations.append((combination, 0.0, energy))
                if not self.combinations:
                    if TOULBAR2_AVAILABLE:
                        report(
                            LOG_BASIC,
                            "   Exact network solver failed. Falling back to stochastic optimization"
                        )
                    report(
                        LOG_BASIC,
                        '  Scoring via sequential iteration and hybridization (%d potential combinations)...'
                        % potential_combinations)
                    report(LOG_BASIC, '    Beginning sequential iteration...')
                    self.score_sequentially(ct, interact, num_sequential_cycles)
                    self.expand_solutions(ct, interact)
                    self.recombine_solutions(ct, interact)

            self.trim_redundant_combinations()

            # Log some results
            report(LOG_BASIC)
            report(LOG_BASIC, '  Cluster results:')
            for i, changeable in enumerate(self.changeables):
                state_name = changeable.state_names[self.combinations[0][0][i]]
                if changeable.pka is None:
                    msg = f'    {changeable.name} : {state_name}'
                else:
                    msg = f'    {changeable.name} : {state_name} (pKa = {changeable.pka:.2f})'
                report(LOG_BASIC, msg)
            report(LOG_EXTRA, '  Top %d results:' % len(self.combinations))
            for combination in self.combinations:
                report(LOG_EXTRA, '    ' + str(combination))
            report(LOG_BASIC)

[docs]        def solve_network_exact_and_diversify(self,
                                              ct,
                                              interact,
                                              ncombinations,
                                              delta_energy=2.0):
            """
            Solve the network exact

            :param ct: Structure
            :type ct: structure.Structure
            :param interact: Changeable interaction lookup table
            :type interact: Dict[int, Dict[int, bool]]
            :param ncombinations: Number of maximum combinations to return
            :type ncombinations: int
            :param delta_energy: Maximum energy difference of combinations generated compared to the global minimum
            :type delta_energy: float

            :return: List of combinations with first element being the global
                optimum combination or empty list if no optimum found
            :rtype: List[List[int]]
            """
            # Get an upper bound energy by just calculating the ennergy of the
            # current combination and add 1 to it.
            current_combination = [c.current_state for c in self.changeables]
            upper_bound = self.score_combination(ct, interact,
                                                 current_combination) + 1
            solver = NetworkSolver(self, interact, upper_bound)
            optimal_combination = solver.optimal_solution()
            if optimal_combination is None:
                return []
            optimal_energy = self.score_combination(ct, interact,
                                                    optimal_combination)
            report(
                LOG_BASIC,
                f"    Optimal network combination found with energy: {optimal_energy:.5f}"
            )
            if ncombinations <= 1:
                return [optimal_combination]
            # Generate additional combinations for diversity To increase
            # diversity, reduce the set down by picking some random
            # combinations. We reduce here, because toulbar2 is greedy in its
            # generation of combinations and subsequent combinations might be
            # very similar to previous ones.
            combinations = solver.explore_solutions(upper_bound=optimal_energy +
                                                    delta_energy,
                                                    number=1000)
            # Make sure the global minimum is part of the list.
            if combinations:
                k = max(min(len(combinations), ncombinations - 1), 0)
                combinations = random.sample(combinations, k=k)
            return [optimal_combination] + combinations

[docs]        def score_combination(self, ct, interact, states):
            # This is for calculating the energy of a single set of states, and assumes prescoring of self terms.
            energy = 0.0
            for ichangeable, istate in enumerate(states):
                changeable = self.changeables[ichangeable]
                energy += self.self_scores[changeable.index][istate]
                (iacceptors, idonors, iclashers,
                 icharge) = changeable.get_state_sites(ct, istate)
                for jchangeable in range(ichangeable + 1,
                                         len(self.changeables)):
                    changeable2 = self.changeables[jchangeable]
                    if not interact[changeable.index][changeable2.index]:
                        continue
                    jstate = states[jchangeable]
                    key = (changeable.index, changeable2.index)
                    pair_scores = self.pair_scores.get(key)
                    if pair_scores is not None:
                        energy += pair_scores[istate, jstate]
            return energy

[docs]        def single_point(self,
                         ct,
                         interact,
                         static_donors,
                         static_acceptors,
                         static_clashers,
                         xtal_ct=None):
            # This is for calling from the GUI, and assumes nothing is precalculated.
            combination = []
            for changeable in self.changeables:
                combination.append(changeable.current_state)
                if not changeable.treated:
                    changeable.pre_treat(ct)
            if xtal_ct is None:
                self.setup_local_static(ct, static_acceptors, static_donors,
                                        static_clashers)
            else:
                self.setup_local_static_alt(xtal_ct, static_acceptors,
                                            static_donors, static_clashers)
            self.single_point_score = 0.0
            for ichangeable in range(len(self.changeables)):
                changeable1 = self.changeables[ichangeable]
                #istate = self.changeables[ichangeable].current_state
                istate = combination[ichangeable]
                (iacceptors, idonors, iclashers,
                 icharge) = changeable1.get_state_sites(ct, istate)
                self.single_point_score += self.score_pair(
                    ct,
                    iacceptors,
                    idonors,
                    iclashers,
                    icharge,
                    self.static_acceptors,
                    self.static_donors,
                    self.static_clashers,
                    0.0,
                    use_xtal=self.use_xtal)
                self.single_point_score += changeable1.get_penalty(istate)
                for jchangeable in range(ichangeable + 1,
                                         len(self.changeables)):
                    changeable2 = self.changeables[jchangeable]
                    jstate = combination[jchangeable]
                    (jacceptors, jdonors, jclashers,
                     jcharge) = changeable2.get_state_sites(ct, jstate)
                    self.single_point_score += self.score_pair(
                        ct,
                        iacceptors,
                        idonors,
                        iclashers,
                        icharge,
                        jacceptors,
                        jdonors,
                        jclashers,
                        jcharge,
                        use_xtal=self.need_xtal[(changeable1.index,
                                                 changeable2.index)])
            # Restore to proper state
            atoms_to_delete = []
            for i, changeable in enumerate(self.changeables):
                atoms = changeable.assign_state(ct, combination[i], False)
                atoms_to_delete.extend(atoms)
            return atoms_to_delete

[docs]        def setup_local_static_alt(self, ct, static_acceptors, static_donors,
                                   static_clashers):

            self.static_acceptors = []
            self.static_donors = []
            self.static_clashers = []
            base_distance = 4.0

            heavies = []
            for changeable in self.changeables:
                heavies.extend(changeable.get_heavies())

            max_interaction_distance = base_distance
            while static_acceptors and static_donors:
                atoms = analyze.evaluate_asl(ct, 'within %d atom.i_pa_atomindex %s' % \
                         (max_interaction_distance, ','.join(str(i) for i in heavies)))
                for acceptor in static_acceptors:
                    if acceptor in atoms and acceptor not in self.static_acceptors:
                        self.static_acceptors.append(acceptor)
                for donor in static_donors:
                    if donor[0] in atoms and donor not in self.static_donors:
                        self.static_donors.append(donor)
                for clasher in static_clashers:
                    if clasher in atoms and clasher not in self.static_clashers:
                        self.static_clashers.append(clasher)
                if len(self.static_acceptors) > 0 and len(
                        self.static_donors) > 0:
                    break
                max_interaction_distance += 0.5
            return

[docs]        def setup_local_static(self, ct, static_acceptors, static_donors,
                               static_clashers):
            self.static_acceptors = []
            self.static_donors = []
            self.static_clashers = []
            base_distance = 4.0
            heavies = []
            for changeable in self.changeables:
                for atom in changeable.get_heavies():
                    heavies.append(atom)
            if len(static_acceptors) > 0:
                max_interaction_distance = base_distance
                while len(self.static_acceptors) == 0:
                    for acceptor in static_acceptors:
                        for atom in heavies:
                            if measure(ct,
                                       atom1=atom,
                                       atom2=acceptor,
                                       use_xtal=self.use_xtal
                                      ) <= max_interaction_distance:
                                self.static_acceptors.append(acceptor)
                                break
                    max_interaction_distance += 0.5
            if len(static_donors) > 0:
                max_interaction_distance = base_distance
                while len(self.static_donors) == 0:
                    for donor in static_donors:
                        for atom in heavies:
                            if measure(ct,
                                       atom1=atom,
                                       atom2=donor[0],
                                       use_xtal=self.use_xtal
                                      ) <= max_interaction_distance:
                                self.static_donors.append(donor)
                                break
                    max_interaction_distance += 0.5
            if len(static_clashers) > 0:
                max_interaction_distance = base_distance
                for clasher in static_clashers:
                    for atom in heavies:
                        if measure(ct,
                                   atom1=atom,
                                   atom2=clasher,
                                   use_xtal=self.use_xtal
                                  ) <= max_interaction_distance:
                            self.static_clashers.append(clasher)
                            break
                    max_interaction_distance += 0.5
            return

[docs]        def pre_score_self(self, ct):
            """
            Calculate the self score for each state. Interactions are
            calculated between the changeable and its static environment.
            """
            if ct.property.get(ANNOTATED_PROPERTY):
                working_ct = ct
            else:
                working_ct = generate_annotated_ct(ct,
                                                   self.static_donors,
                                                   self.static_acceptors,
                                                   self.static_clashers,
                                                   use_xtal=self.use_xtal)
            dcell = create_distance_cell(working_ct,
                                         MAX_HEAVY_HEAVY_INTERACTION_DISTANCE,
                                         honor_pbc=False)
            for changeable in self.changeables:
                self.self_scores[changeable.index] = np.zeros(
                    changeable.nstates, dtype=np.double)
                self.self_charges[changeable.index] = np.zeros(
                    changeable.nstates, dtype=np.double)
                if changeable.locked:
                    continue
                for istate in range(changeable.nstates):
                    # Should be okay to use working_ct here, even though
                    # get_state_sites alters coordinates for rotatables. The
                    # alteration only need to persist through score_pair().
                    (iacceptors, idonors, iclashers,
                     icharge) = changeable.get_state_sites(working_ct, istate)
                    static_acceptors, static_donors, static_clashers = changeable.get_close_interactors(
                        working_ct, dcell)
                    # Get the score (don't need use_xtal even if xtal is on,
                    # because we have explicit crystalmates.
                    score = self.score_pair(working_ct,
                                            iacceptors,
                                            idonors,
                                            iclashers,
                                            icharge,
                                            static_acceptors,
                                            static_donors,
                                            static_clashers,
                                            0.0,
                                            use_xtal=False)
                    penalty = changeable.get_penalty(istate)
                    total_score = score + penalty
                    self.self_scores[changeable.index][istate] = total_score
                    self.self_charges[changeable.index][istate] = icharge

[docs]        def pre_score_pairs(self, ct, interact):
            mapper = {
                changeable.index: changeable for changeable in self.changeables
            }
            for changeable1 in self.changeables:
                if changeable1.locked:
                    istates = [changeable1.current_state]
                else:
                    istates = list(range(changeable1.nstates))
                for index2 in interact[changeable1.index]:
                    if index2 <= changeable1.index:
                        continue
                    changeable2 = mapper.get(index2)
                    if changeable2 is None:
                        continue
                    if changeable2.locked:
                        jstates = [changeable2.current_state]
                    else:
                        jstates = list(range(changeable2.nstates))
                    key1 = (changeable1.index, changeable2.index)
                    key2 = (changeable2.index, changeable1.index)
                    need_xtal = self.need_xtal[key1]
                    pair_scores = np.zeros(
                        (changeable1.nstates, changeable2.nstates),
                        dtype=np.double)
                    for istate in istates:
                        (iacceptors, idonors, iclashers,
                         icharge) = changeable1.get_state_sites(ct, istate)
                        for jstate in jstates:
                            (jacceptors, jdonors, jclashers,
                             jcharge) = changeable2.get_state_sites(ct, jstate)
                            score = self.score_pair(ct,
                                                    iacceptors,
                                                    idonors,
                                                    iclashers,
                                                    icharge,
                                                    jacceptors,
                                                    jdonors,
                                                    jclashers,
                                                    jcharge,
                                                    use_xtal=need_xtal)
                            pair_scores[istate, jstate] = score
                    self.pair_scores[key1] = pair_scores
                    self.pair_scores[key2] = pair_scores.T

[docs]        def score_pair(self,
                       ct,
                       iacceptors,
                       idonors,
                       iclashers,
                       icharge,
                       jacceptors,
                       jdonors,
                       jclashers,
                       jcharge,
                       use_xtal=False):
            score = 0.0
            # Clasher-clasher interaction
            for clash1, clash2 in itertools.product(iclashers, jclashers):
                distance = measure(ct,
                                   atom1=clash1,
                                   atom2=clash2,
                                   use_xtal=use_xtal)
                clashing_term = self.calculate_clash_term(
                    distance, self.nonpolar_clash_cutoff, base=30)
                score += clashing_term

            # Clasher-donor interaction
            for clashers, donors in [(iclashers, jdonors),
                                     (jclashers, idonors)]:
                for donor, clasher in itertools.product(donors, clashers):
                    hydrogen = donor[1]
                    distance = measure(ct,
                                       atom1=clasher,
                                       atom2=hydrogen,
                                       use_xtal=use_xtal)
                    clashing_term = self.calculate_clash_term(
                        distance, self.nonpolar_clash_cutoff, base=30)
                    score += clashing_term

            # Clashing part of donor-donor interaction
            for donor1, donor2 in itertools.product(idonors, jdonors):
                h1 = donor1[1]
                h2 = donor2[1]
                distance = measure(ct, atom1=h1, atom2=h2, use_xtal=use_xtal)
                clashing_term = self.calculate_clash_term(
                    distance, self.polar_clash_cutoff, base=50)
                score += clashing_term
                # Metals are part of the donor list by convention. Penalize
                # interaction if a donor is pointing to a metal. We do not know
                # upfront whether a donor is a metal, so we need to calculate
                # both combinations.
                metal_term = self.score_metal_donor(ct,
                                                    donor1,
                                                    donor2,
                                                    use_xtal=use_xtal)
                metal_term += self.score_metal_donor(ct,
                                                     donor2,
                                                     donor1,
                                                     use_xtal=use_xtal)
                score += metal_term

            # Acceptor-acceptor repulsion. Only counts for static acceptors,
            # i.e. acceptors that cannot donate.
            donor_acceptors = {d[0] for d in itertools.chain(idonors, jdonors)}
            for iacceptor, jacceptor in itertools.product(
                    iacceptors, jacceptors):
                if iacceptor in donor_acceptors or jacceptor in donor_acceptors:
                    continue
                score += self.score_acceptor_acceptor(ct,
                                                      iacceptor,
                                                      jacceptor,
                                                      use_xtal=use_xtal)

            # Donor-acceptor interactions. Donors can accept from other donors
            acceptor_donor_pairs = ((idonors, jacceptors), (jdonors,
                                                            iacceptors))
            for donors, acceptors in acceptor_donor_pairs:
                for (heavy, hydrogen), acceptor in itertools.product(
                        donors, acceptors):
                    # Do not score metal - acceptor interactions
                    if heavy == hydrogen:
                        continue
                    score += self.score_donor_acceptor(ct,
                                                       heavy,
                                                       hydrogen,
                                                       acceptor,
                                                       use_xtal=use_xtal)
            return score

[docs]        def score_acceptor_acceptor(self,
                                    ct: structure.Structure,
                                    iacceptor: int,
                                    jacceptor: int,
                                    use_xtal=False) -> float:
            """
            Scoring function for acceptor-acceptor interactions.

            These interactions are considered worse than hydrogen clashes, and
            its base penalty is thus higher. It is set to 70 as it makes the
            scoring function more robust for the Asn and Gln state of 2X9E and
            3UZA that interact with the ligand. No other optimization has been
            performed.

            :return: Score
            """
            distance = measure(ct,
                               atom1=iacceptor,
                               atom2=jacceptor,
                               use_xtal=use_xtal)
            score = self.calculate_clash_term(
                distance, self.acceptor_acceptor_clash_cutoff, base=40)
            return score

[docs]        def score_donor_acceptor(self,
                                 ct,
                                 donor_heavy,
                                 donor_hydrogen,
                                 acceptor_heavy,
                                 use_xtal=False):
            heavy_heavy_distance = measure(ct,
                                           atom1=donor_heavy,
                                           atom2=acceptor_heavy,
                                           use_xtal=use_xtal)
            if heavy_heavy_distance > MAX_HEAVY_HEAVY_INTERACTION_DISTANCE:
                return 0.0
            if heavy_heavy_distance == 0.0:
                name1 = get_atom_string(ct.atom[acceptor_heavy])
                name2 = get_atom_string(ct.atom[donor_heavy])
                report(LOG_BASIC,
                       f'WARNING: Overlapping atoms found: {name1} - {name2}')
                return 0.0
            # Acceptor-metal interaction score is only based on distance
            if donor_heavy == donor_hydrogen:
                return self.calculate_distance_term(heavy_heavy_distance)
            angle = measure(ct,
                            atom1=donor_hydrogen,
                            atom2=donor_heavy,
                            atom3=acceptor_heavy,
                            use_xtal=use_xtal)
            angle_term = self.calculate_angle_term(angle)
            if angle_term == 0:
                return 0.0
            distance_term = self.calculate_distance_term(heavy_heavy_distance)
            score = distance_term * angle_term
            return score

[docs]        @staticmethod
        def score_metal_donor(ct: structure.Structure,
                              metal: Tuple[int, int],
                              donor: Tuple[int, int],
                              use_xtal=False) -> float:
            """
            Score a metal-donor interaction. Donors should not be pointing
            towards metals. If so, this will result in a penalty
            """
            # Check if metal is actually a metal. It is a metal if the heavy
            # atom index is the same as the hydrogen index.
            m1, m2 = metal
            if m1 != m2:
                return 0.0
            # Check that the donor is NOT a metal.
            dheavy, dhydrogen = donor
            if dheavy == dhydrogen:
                return 0.0
            distance = measure(ct, atom1=m1, atom2=dheavy, use_xtal=use_xtal)
            if distance >= 3.0:
                return 0.0
            angle = measure(ct, atom1=m1, atom2=dheavy, atom3=dhydrogen)
            if angle < 95:
                # Return a score of +5 that is between the value of a clash but
                # bigger than an attractive potential.
                return 5.0
            return 0.0

[docs]        @staticmethod
        def calculate_distance_term(distance):
            """Return distance dependent part of the hydrogen-bond potential functions."""
            return -(3.0 / distance)**3

[docs]        @staticmethod
        def calculate_angle_term(angle):
            """
            Return angle dependent part of the hydrogen bond potential.

            :param angle: Angle in degrees formed by H-D-A, with Hydrogen, Donor and Acceptor
            :param angle: float

            :return: Score
            :rtype: float
            """
            if angle >= 90.0:
                return 0
            return math.cos(math.radians(angle))**2

[docs]        @staticmethod
        def calculate_clash_term(distance, cutoff, base=50):
            """Return clash term"""
            if distance < cutoff:
                # Distinghuish between bad and very bad clashes
                return base * (1 + (cutoff - distance))
            return 0.0

[docs]        def score_exhaustively(self,
                               ct,
                               interact,
                               find_all_solutions=True,
                               tolerate_clashes=False):

            def recurse(changeables, combination, energy, charge, ct, interact,
                        find_all_solutions, tolerate_clashes):
                if len(combination) > 0:
                    # Abort this branch if we've picked up any clashes with the static.
                    ichangeable = len(combination) - 1
                    istate = combination[-1]
                    changeable1 = self.changeables[ichangeable]
                    (iacceptors, idonors, iclashers,
                     icharge) = changeable1.get_state_sites(ct, istate)
                    self_energy = self.self_scores[changeable1.index][istate]
                    if self_energy > 10.0 and not tolerate_clashes:
                        report(LOG_DEBUG, '         Self clash.')
                        return -1
                    energy += self_energy
                    charge += self.self_charges[changeable1.index][istate]
                    # Abort this branch if we've picked up any clashes with an existing changeable.
                    for jchangeable in range(len(combination) - 2, -1, -1):
                        changeable2 = self.changeables[jchangeable]
                        if not interact[changeable1.index][changeable2.index]:
                            continue
                        jstate = combination[jchangeable]
                        key = (changeable1.index, changeable2.index)
                        pair_energy = self.pair_scores[key][istate, jstate]
                        if pair_energy > 10.0 and not tolerate_clashes:
                            report(
                                LOG_DEBUG,
                                '         Clash with changeable %d - %s.' %
                                (jchangeable, changeable2.name))
                            return jchangeable
                        else:
                            energy += pair_energy
                if not find_all_solutions:
                    report(LOG_DEBUG,
                           '       Current combination: %s' % str(combination))
                if changeables:
                    # Delve another level down.
                    clashes = []
                    if changeables[0].locked:
                        new_combination = combination + [
                            changeables[0].current_state
                        ]
                        self.num_scored += 1
                        if not find_all_solutions and self.num_scored > self.max_score:
                            report(
                                LOG_BASIC,
                                '      Failed to find valid initial combination in alotted time.'
                            )
                            self.abort_sampling = True
                            return
                        clash = recurse(changeables[1:], new_combination,
                                        energy, charge, ct, interact,
                                        find_all_solutions, tolerate_clashes)
                        clashes.append(clash)
                        if self.abort_sampling:
                            return None
                    else:
                        for state in range(changeables[0].nstates):
                            new_combination = combination + [state]
                            self.num_scored += 1
                            if not find_all_solutions and self.num_scored > self.max_score:
                                report(
                                    LOG_DEBUG,
                                    '      Failed to find valid initial combination in alotted time.'
                                )
                                self.abort_sampling = True
                                return
                            report(
                                LOG_DEBUG,
                                '      Trying state %d (%s) for changeable %d (%s).'
                                % (state, changeables[0].state_names[state],
                                   len(combination), changeables[0].name))
                            clash = recurse(changeables[1:], new_combination,
                                            energy, charge, ct, interact,
                                            find_all_solutions,
                                            tolerate_clashes)
                            clashes.append(clash)
                            if self.abort_sampling:
                                return None
                            if self.abort_to_level is not None:
                                if self.abort_to_level == len(combination):
                                    report(LOG_DEBUG, '      Ceasing abort.')
                                    self.abort_to_level = None
                                else:
                                    return
                    if None not in clashes:
                        highest_clash = 0
                        for clash in clashes:
                            if clash > highest_clash:
                                highest_clash = clash
                        self.abort_to_level = highest_clash
                        report(
                            LOG_DEBUG,
                            '      Aborting to level %d' % self.abort_to_level)
                else:
                    if energy > 10.0 and not tolerate_clashes:
                        return
                    # Finished.
                    report(
                        LOG_DEBUG, '        Final score = ' + str(energy) +
                        ' (' + str(combination) + ')')
                    self.combinations.append((combination, charge, energy))
                    if not find_all_solutions:
                        # Only wanted one.
                        self.abort_sampling = True
                        return None
                    if len(self.combinations) > self.max_keep:
                        self.combinations.sort(key=operator.itemgetter(-1))
                        self.combinations = self.combinations[0:self.max_keep]

            self.abort_sampling = False
            self.abort_to_level = None
            self.num_scored = 0
            energy = 0.0
            charge = 0.0
            recurse(self.changeables, [], energy, charge, ct, interact,
                    find_all_solutions, tolerate_clashes)
            # Final sort.
            self.combinations.sort(key=lambda item: item[-1])

[docs]        def score_sequentially(self, ct, interact, num_sequential_cycles):
            """
            This routine uses an algorithm similar to Prime's iteration to
            convergence.  Starting from a random configuration, each species is
            optimized in turn, keeping the others fixed in their current state.
            This continues until the system reaches convergence (no more
            changes in the most optimal state for all residues).

            :param ct: input/output structure, will be modified
            :type ct: schrodigner.Structure
            :param interact: ??
            :type interact: ??
            :param num_sequential_cycles: Number of cycles of randomization
                and optimization to conduct
            :type num_sequential_cycles: int
            """
            report(LOG_BASIC,
                   '      Stage 1: Sequential iteration with hybridization')
            local_combinations = []
            randomize = True
            num_failed_hybrids = 0
            # Passes with different random starting points
            for iter in range(num_sequential_cycles):
                # Whether or not to create a random starting point, or just work with the current one
                if randomize:
                    comb = [
                        random.randint(0, changeable.nstates - 1)
                        for changeable in self.changeables
                    ]
                # Main iteration routine.
                (total_energy, problem_children) = self.iterate_to_convergence(
                    ct, interact, comb)
                # Append the converged solution
                local_combinations.append(
                    ((comb, 0.0, total_energy), problem_children))
                report(
                    LOG_EXTRA,
                    '        Energy for this pass: ' + str(total_energy) +
                    ' (' + str(len(problem_children)) + ' problematic members)')
                for problem in problem_children:
                    report(LOG_EXTRA,
                           '          ' + self.changeables[problem].name)
                #for combi in local_combinations:
                #    print str(combi)
                # Successfully found a good solution.  Append it to the list and start again.
                if len(problem_children) == 0:
                    report(
                        LOG_EXTRA,
                        '    No problematic regions for this structure, storing it and starting again.'
                    )
                    self.combinations.append((comb, 0.0, total_energy))
                    local_combinations = []
                    randomize = True
                    num_failed_hybrids = 0
                    continue
                # Try to fix the best solution with pieces of the other solutions.
                if (iter >= 3) and (iter % 2 == 1):
                    comb = self.create_hybrid(local_combinations, interact)
                    if comb is None:
                        num_failed_hybrids += 1
                        if num_failed_hybrids == 5:
                            report(
                                LOG_EXTRA,
                                '        Too many failed attempts at hybrid.  Storing this solution and starting again.'
                            )
                            local_combinations.sort(key=lambda x: x[0][-1])
                            self.combinations.append(local_combinations[0][0])
                            local_combinations = []
                            randomize = True
                            num_failed_hybrids = 0
                        report(LOG_EXTRA, '          Randomizing...')
                        randomize = True
                    else:
                        num_failed_hybrids = 0
                        randomize = False
                else:
                    randomize = True
            # If none were found, at least add the current ones.
            if len(self.combinations) == 0:
                for combination in local_combinations:
                    self.combinations.append(combination[0])
            self.combinations.sort(key=operator.itemgetter(-1))
            report(
                LOG_BASIC, '        Best solution after Stage 1: ' +
                str(self.combinations[0][2]))
            report(LOG_EXTRA, '        ' + str(self.combinations[0][0]))
            return

[docs]        def expand_solutions(self, ct, interact):
            """
            This takes an existing set of good solutions and generates more by
            deconverging them and then iterating them back to convergence.
            Generates at least 10 new solutions.
            """
            report(LOG_BASIC,
                   '      Stage 2: Diversification of existing solutions')
            new_combinations = []
            while len(new_combinations) < 10:
                for comb in self.combinations:
                    (local_comb, charge, initial_energy) = copy.deepcopy(comb)
                    report(
                        LOG_EXTRA,
                        '        Diverging solution with initial energy of ' +
                        str(initial_energy))
                    self.deconverge(ct, interact, local_comb)
                    report(LOG_EXTRA, '        Now reconverging...')
                    (final_energy,
                     problem_children) = self.iterate_to_convergence(
                         ct, interact, local_comb)
                    report(
                        LOG_EXTRA, '        Energy for reconverged solution: ' +
                        str(final_energy))
                    new_combinations.append((local_comb, 0.0, final_energy))
            self.combinations.extend(new_combinations)
            self.combinations.sort(key=operator.itemgetter(-1))
            report(
                LOG_BASIC, '        Best solution after Stage 2: ' +
                str(self.combinations[0][2]))
            report(LOG_EXTRA, '        ' + str(self.combinations[0][0]))
            return

[docs]        def recombine_solutions(self, ct, interact):
            """
            This is similar to score_sequentially, but begins with some
            pre-existing good solutions in self.combinations, and then creates
            hybrids to try to improve on them.
            """
            report(LOG_BASIC,
                   '      Stage 3: Recombination of existing solutions')
            # Set up the list of combinations, plus their problem children.
            # Iterations should do nothing.
            local_combinations = []
            for (comb, charge, energy) in self.combinations:
                (total_energy, problem_children) = self.iterate_to_convergence(
                    ct, interact, comb, problem_cutoff=0.0)
                local_combinations.append(
                    ((comb, 0.0, total_energy), problem_children))
            for iter in range(30):
                comb = self.create_hybrid(local_combinations,
                                          interact,
                                          random_scaffold=True)
                if comb is None:
                    continue
                (total_energy, problem_children) = self.iterate_to_convergence(
                    ct, interact, comb, problem_cutoff=-0.5)
                report(
                    LOG_EXTRA,
                    '        Energy for this pass: ' + str(total_energy) +
                    ' (' + str(len(problem_children)) + ' problematic members)')
                local_combinations.append(
                    ((comb, 0.0, total_energy), problem_children))
            for comb in local_combinations:
                self.combinations.append(comb[0])
            self.combinations.sort(key=operator.itemgetter(-1))
            report(
                LOG_BASIC, '        Best solution after Stage 3: ' +
                str(self.combinations[0][2]))
            report(LOG_EXTRA, '          ' + str(self.combinations[0][0]))
            return

[docs]        def deconverge(self, ct, interact, comb, problem_cutoff=50.0):
            """
            This starts with what is assumed to be a good solution, and then
            randomizes the states, but not to anything that produces a problem.
            """
            global_to_local_index = {
                c.index: n for n, c in enumerate(self.changeables)
            }
            for (ichangeable, changeable) in enumerate(self.changeables):
                states = [i for i in range(changeable.nstates)]
                random.shuffle(states)
                for istate in states:
                    state_energy = self.self_scores[changeable.index][istate]
                    for changeable2_index in interact[changeable.index]:
                        jchangeable = global_to_local_index.get(
                            changeable2_index)
                        # local index might not be available if the cluster size has
                        # been capped in the ProtAssign workflow
                        if jchangeable is None:
                            continue
                        jstate = comb[jchangeable]
                        key = (changeable.index, changeable2_index)
                        pair_scores = self.pair_scores.get(key)
                        if pair_scores is not None:
                            state_energy += pair_scores[istate, jstate]
                    if state_energy < problem_cutoff:
                        comb[ichangeable] = istate
                        break

[docs]        def iterate_to_convergence(self,
                                   ct,
                                   interact,
                                   comb,
                                   problem_cutoff=50.0):
            """
            This iterates the combination 'comb' to convergence.  Maximum of 10
            cycles.
            """
            # Create a list of indices for shuffling later. Using a different
            # order each time may help avoid a few local minima.
            global_to_local_index = {
                c.index: n for n, c in enumerate(self.changeables)
            }
            changeable_list = list(range(len(self.changeables)))
            for ipass in range(10):
                random.shuffle(changeable_list)
                problem_children = []
                nchanges = 0
                #for ichangeable,changeable in enumerate(self.changeables):
                for ichangeable in changeable_list:
                    changeable = self.changeables[ichangeable]
                    if changeable.locked:
                        comb[ichangeable] = changeable.current_state
                        continue
                    best_state = -1
                    best_state_energy = 1000000
                    for istate in range(changeable.nstates):
                        state_energy = self.self_scores[
                            changeable.index][istate]
                        for changeable2_index in interact[changeable.index]:
                            jchangeable = global_to_local_index.get(
                                changeable2_index)
                            # local index might not be available if the cluster size has
                            # been capped in the ProtAssign workflow
                            if jchangeable is None:
                                continue
                            jstate = comb[jchangeable]
                            key = (changeable.index, changeable2_index)
                            pair_scores = self.pair_scores.get(key)
                            if pair_scores is None:
                                continue
                            state_energy += pair_scores[istate, jstate]
                        if state_energy < best_state_energy:
                            best_state = istate
                            best_state_energy = state_energy
                    if best_state != comb[ichangeable]:
                        nchanges += 1
                    comb[ichangeable] = best_state
                    report(
                        LOG_DEBUG, '        Best score for ' + changeable.name +
                        ' is ' + str(best_state_energy) + ' for state ' +
                        changeable.state_names[best_state])
                    if best_state_energy > problem_cutoff:
                        problem_children.append(ichangeable)
                if nchanges == 0:
                    break
            total_energy = self.score_combination(ct, interact, comb)
            return (total_energy, problem_children)

[docs]        def create_hybrid(self,
                          local_combinations,
                          interact,
                          random_scaffold=False):
            """
            This takes the lowest energy solution, and for each problematic
            region it searches other solutions (in random order) for any which
            may have had better luck for just that part of the overall cluster.
            It then splices those solutions into the lowest energy one. If
            random_scaffold, then it selects a random solution as the basis in
            stead of the lowest energy one.
            """
            # Try to fix the best solution with pieces of the other solutions
            report(LOG_EXTRA, '        Creating hybrid solution...')
            local_combinations.sort(key=lambda x: x[0][-1])
            fixes = {}
            neighbours = {}
            if random_scaffold:
                iscaffold = random.randint(0, len(local_combinations) - 1)
            else:
                iscaffold = 0
            for problem_child in local_combinations[iscaffold][1]:
                # Randomize the order in which solutions are searched
                randomized_combinations = copy.deepcopy(local_combinations)
                random.shuffle(randomized_combinations)
                for solution in randomized_combinations:
                    if problem_child not in solution[1]:
                        fixes[problem_child] = solution[0][0][problem_child]
                        for ichangeable in range(len(self.changeables)):
                            if interact[self.changeables[problem_child].index][
                                    self.changeables[ichangeable].index]:
                                neighbours[ichangeable] = solution[0][0][
                                    ichangeable]
                        break
            fixes.update(neighbours)
            if len(fixes) != 0:
                comb = copy.deepcopy(local_combinations[0][0][0])
                for (problem_child, istate) in fixes.items():
                    comb[problem_child] = istate
                randomize = False
                return comb
            else:
                report(LOG_EXTRA, '    No valid hybrid found.')
                return None

[docs]        def trim_redundant_combinations(self):
            nonredund_combs = []
            existing_scores = set()
            for comb in self.combinations:
                if comb[2] not in existing_scores:
                    nonredund_combs.append(comb)
                    existing_scores.add(comb[2])
            self.combinations = nonredund_combs
            return

[docs]        def assign_combination(self,
                               ct,
                               icombination,
                               add_labels,
                               label_pkas,
                               verbose=False):
            """
            Assign a given combination to this cluster

            :param ct: The structure to operate on
            :type ct: schrodinger.Structure

            :param icombination: The index of the combination to assign or if
               this number is larger then the stored combinations, just keep
               the current state
            :type icombinations: integer

            :param add_labels: Whether to add labels to atoms to be seen in
                maestro with the current protonation state
            :type add_labels: bool

            :param label_pka: Whether to add labels for the pKa of each residue
            :type label_pka: bool

            :param verbose: Whether to report additional information to the log
                file about the combination chosen
            :type verbose: bool
            """
            atoms_to_delete = []
            if icombination < len(self.combinations):
                for ichangeable in range(len(self.changeables)):
                    state_gaps = self.determine_gap(icombination, ichangeable)
                    atoms = self.changeables[ichangeable].assign_state(
                        ct,
                        self.combinations[icombination][0][ichangeable],
                        add_labels,
                        label_pkas,
                        state_gaps=state_gaps,
                        verbose=verbose)
                    atoms_to_delete.extend(atoms)
            else:
                for ichangeable in range(len(self.changeables)):
                    atoms = self.changeables[ichangeable].assign_state(
                        ct, self.changeables[ichangeable].current_state,
                        add_labels, label_pkas)
                    atoms_to_delete.extend(atoms)
            return atoms_to_delete

[docs]        def determine_gap(self, icombination, ichangeable):
            """
            Create a dictionary with the energy gaps to each of the various states.
            States that differ by only a hydrogen rotation are not considered
            unique

            :type icombination: integer
            :param icombination: the combination to use as the zero
                point.  In most situations this will be the lowest energy
                combination ( 0 when sorted)
            :type ichangeable: integer
            :param ichangeable: The residue number ( or position number)
                within the cluster which will be analyzed
            :rparam: dictionary where the key is the name of the state
                or "Default" when the state is one of the staggers
            :rtype: dictionary with a key of string and value of a float
            """

            def dedup_state(state):
                if (re.match(r"Stagger \d+ \d+/\d+", state) or
                        re.match("Stagger", state) or
                        re.match(r"Orientation \d+/\d+", state) or
                        re.match(r"Default \d+ \d+/\d+", state) or
                        re.match(r"Default \d+/\d+", state)):
                    return "Default"
                return state

            changeable = self.changeables[ichangeable]
            state_gaps = {}
            min_score = self.combinations[icombination][2]
            for combination in self.combinations:
                state_name = dedup_state(
                    changeable.state_names[combination[0][ichangeable]])
                if state_name not in state_gaps:
                    state_gaps[state_name] = combination[2] - min_score
                else:
                    state_gaps[state_name] = min(combination[2] - min_score,
                                                 state_gaps[state_name])
            return state_gaps

    # ProtAssign class start
[docs]    def __init__(
            self,
            ct,
            interactive=False,
            do_flips=True,
            asl='',
            noprot_asl='',
            atoms=[],  # noqa: M511
            use_xtal=False,
            sample_waters=True,
            sample_acids=True,
            freeze_existing=False,
            include_initial=False,
            max_comb=10000,
            num_sequential_cycles=30,
            max_cluster_size=None,  # The maximum number of residues
            # to include in each cluster or None
            # if no such maximum should be used
        logging_level=DEFAULT_LOG_LEVEL,
            quiet_flag=False,
            debug_flag=False,
            add_labels=True,
            label_pkas=False,
            pH=pH_neutral,
            use_propka=True,
            propka_pH=7.0,
            user_states=[],  # noqa: M511
            minimize=False):

        global log_level
        log_level = logging_level

        # Alternate (backwards compatible) way of specifying output level.
        if quiet_flag:
            log_level = LOG_NONE
        elif debug_flag:
            log_level = LOG_DEBUG

        self.species = [
            self.amide_changeable, self.amine_changeable,
            self.histidine_changeable, self.rotatable_changeable
        ]
        self.interactive = interactive
        self.do_flips = do_flips
        self.noprot_asl = noprot_asl
        self.use_xtal = use_xtal
        self.freeze_existing = freeze_existing
        self.include_initial = include_initial
        self.max_comb = max_comb
        self.num_sequential_cycles = num_sequential_cycles
        self.max_cluster_size = max_cluster_size
        self.add_labels = add_labels
        self.label_pkas = label_pkas
        self.changeables = []
        self.clusters = []
        self.interact = []
        self.donors = []
        self.acceptors = []
        self.clashers = []
        self.xtal_ct = None
        self.pH = pH
        self.use_propka = use_propka
        self.propka_pH = propka_pH
        self.propka_failed = False
        self.pka_predictors = {}
        if mmutil.feature_flag_is_enabled(
                mmutil.PROTASSIGN_PKA_PREDICTOR) and use_propka:
            self.pka_predictors = {
                'his': HistidinepKaPredictor(),
                'asp': AsparticAcidpKaPredictor(),
                'glu': GlutamicAcidpKaPredictor(),
            }
        self.unsatisfied_donors_by_cluster = defaultdict(list)

        self.user_states = user_states
        self.minimize = minimize

        if sample_acids:
            self.species.append(self.carboxyl_changeable)
        if sample_waters:
            self.species.append(self.water_changeable)

        # If using PropKa, don't want to use the coarse pH rules
        if self.use_propka:
            self.pH = None
            # Record it though
            self.pH_backup = pH

        # Fix some element types, in case they're wrong.
        self.fix_elements(ct)

        # Set up the atoms to process.
        asl_atoms = []
        if asl == '' and len(atoms) == 0:
            asl = 'all'
        if asl:
            asl_atoms = analyze.evaluate_asl(ct, asl)
        self.target_atoms = set(atoms + asl_atoms)
        H_atoms = analyze.evaluate_asl(ct, 'atom.ele H')
        self.target_hydrogens = set(H_atoms).intersection(self.target_atoms)

        if self.freeze_existing:
            self.frozen_hydrogens = self.target_hydrogens.copy()
            self.freeze_existing_hydrogens(ct)
        else:
            self.frozen_hydrogens = set()

        self.setup(ct)

        self.set_user_states(ct)

        if not self.interactive:
            self.optimize(ct)
            self.cleanup(ct)
            self.summarize_pkas()

        return

[docs]    def fix_elements(self, ct):
        for atom in ct.atom:
            if atom.pdbname in [' OD1', ' OE1', ' OD2', ' OE2']:
                atom.atomic_number = 8
            elif atom.pdbname in [' ND1', ' ND2', ' NE2']:
                atom.atomic_number = 7
            elif atom.pdbname in [' CG ', ' CD ', ' CD2', ' CE1']:
                atom.atomic_number = 6
        ct.retype()
        return

[docs]    def freeze_existing_hydrogens(self, ct):
        atoms_to_remove = set()
        labile_H_atoms = set(
            analyze.evaluate_asl(
                ct,
                'atom.ptype \" HD1\" OR atom.ptype \" HE2\" OR atom.ptype \"1HD2\" OR atom.ptype \"2HD2\" OR atom.ptype \"1HE2\" OR atom.ptype \"2HE2\" OR atom.ptype \"HD21\" OR atom.ptype \"HD22\" OR atom.ptype \"HE21\" OR atom.ptype \"HE22\"'
            ))
        for hydrogen in self.target_hydrogens.intersection(labile_H_atoms):
            if hydrogen in atoms_to_remove:
                continue
            residue_atoms = ct.getResidueAtoms(hydrogen)
            for residue_atom in residue_atoms:
                atom = int(residue_atom)
                atoms_to_remove.add(atom)
        for hydrogen in self.target_hydrogens.difference(labile_H_atoms):
            if hydrogen in atoms_to_remove:
                continue
            residue_atoms = ct.getResidueAtoms(hydrogen)
            for residue_atom in residue_atoms:
                atom = int(residue_atom)
                if ct.areBound(hydrogen, atom):
                    atoms_to_remove.add(atom)
        self.target_atoms.difference_update(atoms_to_remove |
                                            self.frozen_hydrogens)
        return

[docs]    def setup(self, ct):
        charge_arginine_sidechains(ct)
        build.add_hydrogens(ct)
        self.extend_targeted_to_hyds(ct)
        self.identify_species(ct)
        if self.use_propka and not mmutil.feature_flag_is_enabled(
                mmutil.PROTASSIGN_PKA_PREDICTOR):
            try:
                pkas = self.run_propka(self.changeables,
                                       ct,
                                       use_xtal=self.use_xtal)
            except PropKaException:
                report(
                    LOG_BASIC,
                    'WARNING: PropKa failed on this structure - disabling use of PropKa.'
                )
                self.use_propka = False
                self.pH = self.pH_backup
                self.propka_failed = True
                ct.property['b_pa_PROPKA_failed'] = 1
        self.remove_zero_order_bonds(ct)
        for item in self.changeables:
            item.pre_treat_1(ct)
        build.add_hydrogens(ct)
        for item in self.changeables:
            item.pre_treat_2(ct)
        self.record_current_indices(ct)
        self.enumerate_changeable_states(ct)
        self.lock_protonation_states(ct)
        self.cluster(ct)
        if self.use_propka:
            # Use new internal predictor if PROPKA is requested
            if mmutil.feature_flag_is_enabled(mmutil.PROTASSIGN_PKA_PREDICTOR):
                self.empirical_pka_predictor(ct)
            else:
                self.apply_pkas(self.changeables, pkas, self.propka_pH)
        if self.interactive:
            # Reset back to initial states.
            atoms_to_delete = []
            for changeable in self.changeables:
                atoms = changeable.assign_state(ct,
                                                changeable.initial_state,
                                                add_labels=False,
                                                label_pkas=self.label_pkas)
                if atoms is not None:
                    atoms_to_delete.extend(atoms)
            self.delete_atoms(ct, atoms_to_delete)
        return

[docs]    def empirical_pka_predictor(self, ct):
        """Predict pKa of histidine and Asp/Glu based on empirical rules"""
        ct = ct.copy()
        self.annotate_structure(ct)
        if self.use_xtal:
            try:
                mates = analyze.generate_crystal_mates(ct, radius=4.0)
                for mate in mates[1:]:
                    ct.extend(mate)
            except Exception:
                pass
        for resname, predictor in self.pka_predictors.items():
            report(LOG_BASIC, f"Calculating pKa for residue {resname.upper()}")
            predictor.predict(ct)

        for changeable in self.changeables:
            residue = get_residue_from_changeable(ct, changeable)
            if residue is None:
                continue
            for atom in residue.atom:
                pka = atom.property.get('r_pka_prediction')
                if pka is None:
                    continue
                changeable.pka = pka
        # For histidine we calculate two pKa for each of the flip states
        if self.do_flips:
            # Remove previous annotation for pKa prediction
            for atom in ct.atom:
                for key in atom.property:
                    if key.startswith(('b_pka_', 'r_pka_')):
                        del atom.property[key]
            self.pka_predictors['his'].predict(ct, flip=True)
            for changeable in self.changeables:
                if changeable.type != 'HISTIDINE':
                    continue
                residue = get_residue_from_changeable(ct, changeable)
                if residue is None:
                    continue
                for atom in residue.atom:
                    pka = atom.property.get('r_pka_prediction')
                    if pka is None:
                        continue
                    changeable.pka_flip = pka
        # Adjust penalties
        for changeable in self.changeables:
            changeable.change_empirical_pka(self.propka_pH)

[docs]    def remove_zero_order_bonds(self, ct):
        num_zobs = 0
        for item in self.changeables:
            atoms = item.get_adjustable_atoms()
            for iatom in atoms:
                if iatom is None:
                    continue
                deleting = True
                while deleting:
                    deleting = False
                    for bond in ct.atom[iatom].bond:
                        if bond.order == 0:
                            bond.atom1.deleteBond(bond.atom2)
                            bond.atom1.property['i_pa_zob%d' % num_zobs] = 1
                            bond.atom2.property['i_pa_zob%d' % num_zobs] = 1
                            num_zobs += 1
                            deleting = True
                            break
        ct.property['i_pa_num_zobs'] = num_zobs
        return

[docs]    def extend_targeted_to_hyds(self, ct):
        # If a heavy atom is targets, so are its attached hyds.
        H_atoms = set(analyze.evaluate_asl(
            ct,
            'atom.ele H')).difference(self.frozen_hydrogens & self.target_atoms)
        for H_atom in H_atoms:
            for atom in ct.atom[H_atom].bonded_atoms:
                if int(atom) in self.target_atoms:
                    self.target_atoms.add(H_atom)
        return

[docs]    def delete_atoms(self, ct, atoms):
        ct.deleteAtoms(atoms)
        new_indices = {}
        for atom in ct.atom:
            new_indices[atom.property['i_pa_atomindex']] = int(atom)
        for changeable in self.changeables:
            changeable.update_atom_indices(ct, new_indices)
        new_acceptors = []
        for acceptor in self.acceptors:
            new_acceptors.append(new_indices[acceptor])
        self.acceptors = new_acceptors
        new_donors = []
        for donor in self.donors:
            new_donors.append((new_indices[donor[0]], new_indices[donor[1]]))
        self.donors = new_donors
        new_clashers = []
        for clasher in self.clashers:
            new_clashers.append(new_indices[clasher])
        self.clashers = new_clashers
        self.record_current_indices(ct)
        return

[docs]    def run_propka(self, changeables, ct, use_xtal=False):
        # See if we even need to bother.
        # If there's just a single carboxylate with an
        # already assigned pKa, it can't have changed.
        if ((len(changeables) == 1) and (changeables[0].type == 'CARBOXYL') and
            (changeables[0].pka is not None)):
            return {}
        # If there's nothing that depends on pH.
        proceed = False
        for changeable in changeables:
            if (changeable.type in ['HISTIDINE', 'CARBOXYL', 'AMINE'] or
                (changeable.type == 'ROTATABLE' and changeable.ionizable)):
                proceed = True
                break
        if not proceed:
            return {}
        report(LOG_BASIC, '  Running PropKa to get pKas')
        if use_xtal:
            propka_ct = self.generate_mates(ct)
        else:
            propka_ct = ct
        pkas = {}
        try:
            calculations = ['pka']
            calculator = protein.PropertyCalculator(propka_ct,
                                                    'hbondopt_propkajob')
            results = {}
            for res, data in calculator.calculateOverResidues(*calculations):
                if data['pka'] is not None:
                    results[str(res)] = float(data['pka'])
        except:
            raise PropKaException('PropKa failed')
        for changeable in changeables:
            # Need to convert to same format as PropKa uses.
            name = changeable.name.split(':')[0] + ':' + changeable.name.split(
                ':')[1][4:]
            name = name.rstrip()
            if name in results:
                pkas[name] = (changeable.pka, results[name])
        return pkas

[docs]    def generate_mates(self, ct):
        chains = [
            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
            'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
            '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'
        ]
        for chain in ct.chain:
            if chain.name in chains:
                chains.remove(chain.name)
        try:
            mates = analyze.generate_crystal_mates(ct)
        except:
            report(
                LOG_NONE,
                'Failure in generating crystal mates.  Please check crystal properties.'
            )
            sys.exit()
        new_ct = copy.copy(ct)
        for atom in new_ct.atom:
            atom.property['i_pa_propka_maincell'] = 1
        # Need to renumber, rechain
        resnum = 1
        chain = chains.pop()
        for i, mate in enumerate(mates):
            if i == 0:
                continue
            for res in mate.residue:
                res.chain = chain
                res.resnum = resnum
                resnum += 1
                if resnum > 9999:
                    chain = chains.pop()
                    resnum = 1
            new_ct.extend(mate)
        important_atoms = analyze.evaluate_asl(
            new_ct, 'fillres within 20 (atom.i_pa_propka_maincell 1)')
        truncated_new_ct = new_ct.extract(important_atoms)
        build.add_hydrogens(truncated_new_ct)
        return truncated_new_ct

[docs]    def apply_pkas(self, changeables, changes, propka_pH):
        something_changed = False
        for changeable in changeables:
            # Need to convert to same format as PropKa uses.
            name = changeable.name.split(':')[0] + ':' + changeable.name.split(
                ':')[1][4:]
            name = name.rstrip()
            if name in changes:
                old_pka = changeable.pka
                changed = changeable.change_pka(changes[name][1], propka_pH)
                if changed:
                    something_changed = True
                    if old_pka is not None:
                        report(
                            LOG_BASIC,
                            '    Updating protonation penalties for %s: pKa %.2f'
                            % (changeable.name, changeable.pka))
            else:
                changed = changeable.change_pka(None, propka_pH)
        return something_changed

[docs]    def find_protonation_state_changes(self, ct, clusters='all'):
        clusters_to_redo = []
        if mmutil.feature_flag_is_enabled(mmutil.PROTASSIGN_PKA_PREDICTOR):
            pass
        else:
            pkas = self.run_propka(self.changeables, ct, use_xtal=self.use_xtal)
            for i, cluster in enumerate(self.clusters):
                if clusters != 'all' and i not in clusters:
                    continue
                something_changed = self.apply_pkas(cluster.changeables, pkas,
                                                    self.propka_pH)
                if something_changed:
                    clusters_to_redo.append(i)
        return clusters_to_redo

[docs]    def identify_species(self, ct):
        report(LOG_EXTRA)
        for species in self.species:
            atoms = analyze.evaluate_asl(ct, species.asl)
            for atom in atoms:
                if atom not in self.target_atoms:
                    continue
                new_changeable = species(ct, atom)
                if new_changeable.valid:
                    self.changeables.append(new_changeable)
                    self.changeables[-1].index = len(self.changeables) - 1
        return

[docs]    def identify_all_hbonders(self, ct):

        self.donors = []
        donor_Hs = analyze.evaluate_asl(ct, "(atom.ele H AND not /C0-H0/)")
        donor_Hs += analyze.evaluate_asl(
            ct,
            "(res.ptype \"HIS \",\"HID \",\"HIE \",\"HIP \") AND (atom.ptype \" HD2\" or atom.ptype \" HE1\")"
        )
        for H in donor_Hs:
            H_atom = ct.atom[H]
            if H_atom.bond_total == 1:
                for atom in H_atom.bonded_atoms:
                    self.donors.append((int(atom), H))

        self.acceptors = analyze.evaluate_asl(
            ct, "not atom.ele H AND not atom.ele C")
        # Move metal ions from acceptor list to donor list (ion is both heavy atom and "hydrogen").
        atoms_to_remove = []
        for acceptor in self.acceptors:
            if ct.atom[acceptor].formal_charge > 0 and ct.atom[
                    acceptor].atomic_number not in [7, 8]:
                self.donors.append((acceptor, acceptor))
                atoms_to_remove.append(acceptor)
        for atom in atoms_to_remove:
            self.acceptors.remove(atom)
        # Remove any atoms accounted for in donors from list of acceptors.
        for donor in self.donors:
            if donor[0] in self.acceptors:
                self.acceptors.remove(donor[0])
        # All of the non-polar hydrogens
        self.clashers = analyze.evaluate_asl(ct, "atom.ele H AND /C0-H0/")
        return

[docs]    def annotate_structure(self, ct):
        """
        Annotate atoms in structure by their interaction class and whether or
        not they are static

        Internally updates the acceptors, donors and clashers attributes
        """
        self.identify_all_hbonders(ct)
        annotate_structure_interactors(ct, self.acceptors, self.donors,
                                       self.clashers)
        # Indicate if interactor is static. This is important to know during
        # generation of states.
        self.remove_changeables_from_hbonders()
        atom_iterator = itertools.chain(self.acceptors, self.clashers,
                                        *self.donors)
        for iatom in atom_iterator:
            ct.atom[iatom].property[STATIC_PROPERTY] = True
        return ct

[docs]    def enumerate_changeable_states(self, ct):
        """
        Enumerate all states for each changeable. Crystal symmetry mates are
        taken into account if requested.
        """
        working_ct = ct.copy()
        self.annotate_structure(working_ct)
        if self.use_xtal:
            try:
                mates = analyze.generate_crystal_mates(working_ct, radius=4.0)
            except:
                report(
                    LOG_NONE,
                    'Failure in generating crystal mates.  Please check crystal properties.'
                )
                sys.exit()
            for i, mate in enumerate(mates):
                for atom in mate.atom:
                    atom.property['i_pa_xtalatom'] = i
                if i > 0:
                    mates[0].extend(mate)
            self.xtal_ct = mates[0]
            # Atoms in primary image which are close to a crystal image (assume atom indices in ct and xtal_ct are the same).
            surface_atoms = set(
                analyze.evaluate_asl(
                    self.xtal_ct,
                    "(within 5.0 (atom.i_pa_xtalatom > 0)) and not (atom.i_pa_xtal_atom > 0)"
                ))
            for atom in ct.atom:
                if int(atom) in surface_atoms:
                    atom.property['i_pa_surfaceatom'] = 1
                else:
                    atom.property['i_pa_surfaceatom'] = 0
            # Get rid of extra atoms.
            atoms_to_delete = analyze.evaluate_asl(
                self.xtal_ct, "not (fillres within 4 (atom.i_pa_xtalatom 0 ) )")
            self.xtal_ct.deleteAtoms(atoms_to_delete)
            working_ct = self.xtal_ct

        dcell = create_distance_cell(working_ct,
                                     MAX_ORIENT_HYDROGEN_DISTANCE,
                                     honor_pbc=False)
        for changeable in self.changeables:
            acceptors, donors, _ = changeable.get_close_interactors(
                working_ct, dcell)
            changeable.enumerate_states(working_ct,
                                        acceptors,
                                        donors,
                                        self.pH,
                                        do_flips=self.do_flips,
                                        include_initial=self.include_initial)

[docs]    def lock_protonation_states(self, ct):
        if self.noprot_asl == '':
            return
        locked_atoms = analyze.evaluate_asl(ct, self.noprot_asl)
        for changeable in self.changeables:
            atoms = changeable.get_heavies()
            for atom in atoms:
                if atom in locked_atoms:
                    changeable.lock_protonation()
        return

[docs]    def remove_changeables_from_hbonders(self):
        for changeable in self.changeables:
            heavy_atoms = changeable.get_heavies()
            for atom in heavy_atoms:
                while atom in self.acceptors:
                    self.acceptors.remove(atom)
                for donor in reversed(self.donors):
                    if donor[0] == atom:
                        self.donors.remove(donor)
            adjustable_atoms = changeable.get_adjustable_atoms()
            for atom in adjustable_atoms:
                if atom in self.clashers:
                    self.clashers.remove(atom)

    @staticmethod
    def _get_index_clusters(interaction_matrix, max_cluster_size):
        """
        Cluster interactors based on an interaction matrix

        :param interaction_matrix: Interaction lookup table
        :type interaction_matrix: list of list of bool
        :param max_cluster_size: Maximum cluster size. None is no maximum.
        :type max_cluster_size: int or NoneType

        :return: Clustered interactors
        :rtype: list of lists of ints
        """
        ninteractors = len(interaction_matrix)
        # Initially each residue is in its own cluster.
        index_clusters = [[i] for i in range(ninteractors)]
        if max_cluster_size is None:
            max_cluster_size = ninteractors
        # Climb up backwards, merging things up to what they interact with.
        for i in reversed(range(ninteractors)):
            # For each item currently in this cluster.
            icluster = index_clusters[i]
            cluster_size = len(icluster)
            for j in icluster:
                # Find out if it interacts with anything higher up.
                j_interacts_with = interaction_matrix[j]
                merged_cluster = False
                for k in range(min(j, i)):
                    if j_interacts_with[k]:
                        merged_cluster_size = len(
                            index_clusters[k]) + cluster_size
                        if merged_cluster_size > max_cluster_size:
                            continue
                        index_clusters[k] += icluster
                        index_clusters.pop(i)
                        merged_cluster = True
                        break
                if merged_cluster:
                    break
        return index_clusters

[docs]    def cluster(self, ct):
        """Cluster changeables based on their heavies."""

        # Extract all heavies from the changeables as these determine whether
        # they interact with other changeables. Annotate to which changeable a
        # heavy belongs.
        heavies = []
        for changeable in self.changeables:
            for heavy in changeable.get_heavies():
                ct.atom[heavy].property[
                    'i_pa_changeable_index'] = changeable.index
                heavies.append(heavy)
        self.interact = calculate_interaction_matrix(ct, heavies,
                                                     CLUSTERING_DISTANCE,
                                                     self.use_xtal)
        # Clean up ct
        for heavy in heavies:
            del ct.atom[heavy].property['i_pa_changeable_index']

        local_interact = copy.deepcopy(self.interact)
        # Flesh out the interactions if i interacts with j and k, then j and k interact too.
        for i in range(len(self.changeables)):
            for j in range(len(self.changeables)):
                if local_interact[i][j]:
                    for k in range(len(self.changeables)):
                        if local_interact[i][k]:
                            if j != k:
                                local_interact[j][k] = True
                                local_interact[k][j] = True

        # Create clusters based on interaction
        index_clusters = self._get_index_clusters(local_interact,
                                                  self.max_cluster_size)
        self.clusters = []
        # Create the hbond_cluster instances from the index clusters
        for icluster in index_clusters:
            new_cluster = self.hbond_cluster()
            new_cluster.use_xtal = self.use_xtal
            for index in icluster:
                new_cluster.changeables.append(self.changeables[index])
            # Setup record of which species need xtal symmetry to interact.
            new_cluster.setup_xtal(ct, self.interact, CLUSTERING_DISTANCE)
            self.clusters.append(new_cluster)
        # Report.
        report(LOG_BASIC)
        report(LOG_BASIC, 'Clusters to optimize:')
        for i, cluster in enumerate(self.clusters):
            report(LOG_BASIC, ("    Cluster %d:" % (i + 1)))
            for item in cluster.changeables:
                report(LOG_BASIC, ("        " + item.name))
        report(LOG_BASIC)

[docs]    def set_user_states(self, ct):
        for choice in self.user_states:
            (user_name, user_state) = choice
            found_name = False
            for ichangeable, changeable in enumerate(self.changeables):
                # Remove the pdbres
                name = changeable.name[0] + ':' + changeable.name[6:].rstrip()
                if name == user_name:
                    found_name = True
                    found_state = False
                    for istate, state in enumerate(changeable.state_names):
                        if state == user_state:
                            self.assign_state_of_changeable(
                                ct, ichangeable, istate)
                            self.changeables[ichangeable].locked = True
                            found_state = True
            if not found_name:
                print('ERROR: did not find species %s' % user_name)
                sys.exit()
            elif not found_state:
                print('ERROR: did not find state %s for species %s' %
                      (user_state, user_name))
                sys.exit()
        return

[docs]    def assign_state_of_changeable(self, ct, ichangeable, istate):
        atoms_to_delete = self.changeables[ichangeable].assign_state(
            ct, istate, self.add_labels, self.label_pkas)
        if atoms_to_delete is not None:
            self.delete_atoms(ct, atoms_to_delete)
        return

[docs]    def increment_state_of_changeable(self, ct, ichangeable):
        if (self.changeables[ichangeable].current_state +
                1) == self.changeables[ichangeable].nstates:
            self.assign_state_of_changeable(ct, ichangeable, 0)
        else:
            self.assign_state_of_changeable(
                ct, ichangeable,
                self.changeables[ichangeable].current_state + 1)
        return

[docs]    def decrement_state_of_changeable(self, ct, ichangeable):
        if self.changeables[ichangeable].current_state == 0:
            self.assign_state_of_changeable(
                ct, ichangeable, self.changeables[ichangeable].nstates - 1)
        else:
            self.assign_state_of_changeable(
                ct, ichangeable,
                self.changeables[ichangeable].current_state - 1)
        return

[docs]    def record_current_indices(self, ct):
        for index in range(len(ct.atom)):
            ct.atom[index + 1].property['i_pa_atomindex'] = index + 1
        return

[docs]    def assign_best_combinations(self, ct, last_time=False):
        """
        Assign the best combinations to the ct and report output

        :param ct: The structure to operate on
        :type ct: schrodinger.Structure

        :param last_time: Whether or not this is the last time through when we
            should be extra verbose
        :type last_time: bool
        """
        atoms_to_delete = []
        for cluster in self.clusters:
            atoms = cluster.assign_combination(ct,
                                               0,
                                               self.add_labels,
                                               self.label_pkas,
                                               verbose=last_time)
            atoms_to_delete.extend(atoms)
        self.delete_atoms(ct, atoms_to_delete)
        return

[docs]    def assign_cluster_combination(self, ct, icluster, icombination):
        atoms = self.clusters[icluster].assign_combination(
            ct, icombination, self.add_labels, self.label_pkas)
        self.delete_atoms(ct, atoms)
        return

[docs]    def single_point_cluster(self, ct, icluster):
        atoms = self.clusters[icluster].single_point(ct, self.interact,
                                                     self.donors,
                                                     self.acceptors,
                                                     self.clashers)
        self.delete_atoms(ct, atoms)
        atoms = self.clusters[icluster].assign_combination(
            ct, 1000000000, self.add_labels, self.label_pkas)
        self.delete_atoms(ct, atoms)
        return self.clusters[icluster].single_point_score

[docs]    def optimize_cluster(self, ct, icluster, assign=True):
        self.clusters[icluster].optimize(ct,
                                         self.interact,
                                         self.donors,
                                         self.acceptors,
                                         self.clashers,
                                         self.max_comb,
                                         self.num_sequential_cycles,
                                         self.use_propka,
                                         propka_pH=self.propka_pH)
        if assign:
            self.assign_cluster_combination(ct, icluster, 0)
        return

[docs]    def optimize(self, ct):
        clusters_to_do = [i for i in range(len(self.clusters))]
        niter = 0
        niter_max = 2
        if mmutil.feature_flag_is_enabled(
                mmutil.PROTASSIGN_PKA_PREDICTOR) and self.use_propka:
            niter_max = 20
        total_score = 0.0
        while (len(clusters_to_do) > 0) and (niter < niter_max):
            niter += 1
            total_score = 0.0
            # Generate annotated structure for faster optimization, especially
            # for Xtal structures.
            for icluster, cluster in enumerate(self.clusters):
                if icluster in clusters_to_do:
                    for changeable in cluster.changeables:
                        if not changeable.treated:
                            changeable.pre_treat(ct)
            annotated_ct = generate_annotated_ct(ct,
                                                 self.donors,
                                                 self.acceptors,
                                                 self.clashers,
                                                 use_xtal=self.use_xtal)
            for i, cluster in enumerate(self.clusters):
                if i not in clusters_to_do:
                    total_score += cluster.combinations[0][-1]
                    continue
                report(LOG_BASIC, 'Working on cluster #%d...' % (i + 1))
                for changeable in cluster.changeables:
                    report(
                        LOG_BASIC, '    ' + changeable.name + ' : ' +
                        str(changeable.nstates) + ' states')
                cluster.optimize(ct,
                                 self.interact,
                                 self.donors,
                                 self.acceptors,
                                 self.clashers,
                                 self.max_comb,
                                 self.num_sequential_cycles,
                                 self.use_propka,
                                 propka_pH=self.propka_pH,
                                 annotated_ct=annotated_ct)
                # Just in case something went wrong.
                try:
                    total_score += cluster.combinations[0][-1]
                except:
                    pass
            self.assign_best_combinations(ct)
            # Now see if any protonation states need to be redone.
            if self.use_propka and niter < niter_max:
                if mmutil.feature_flag_is_enabled(
                        mmutil.PROTASSIGN_PKA_PREDICTOR):
                    clusters_to_do = self.recalculate_empirical_pkas(ct, niter)
                else:
                    clusters_to_do = self.recalculate_propka_pkas(ct)
            else:
                clusters_to_do = []
        self.assign_best_combinations(ct, last_time=True)
        if self.minimize:
            report(LOG_BASIC, 'Minimizing hydrogens...\n')
            self.minimize_hydrogens(ct)
        report(LOG_BASIC, 'Total score for structure: ' + str(total_score))

[docs]    def recalculate_empirical_pkas(self, ct, iteration):
        """Only recalculate pKa's of histidines for forced acceptor interactions"""
        assert iteration > 0
        working_ct = ct.copy()
        self.annotate_structure(working_ct)
        # Check unsatisfied donors before creating crystal mates, as
        # determining bulk solvent is not yet working with crystal mates
        # present.
        unsatisfied_donors = get_unsatisfied_donors(working_ct,
                                                    include_bsa=False)
        unsatisfied_donors_indices = {a.index for a in unsatisfied_donors}
        if self.use_xtal:
            try:
                mates = analyze.generate_crystal_mates(working_ct, radius=4.0)
                for mate in mates[1:]:
                    working_ct.extend(mate)
            except Exception:
                pass
        clusters_to_do = []
        report(LOG_BASIC,
               "Checking for unsatisfied donors in hydrogen bond networks")
        for icluster, cluster in enumerate(self.clusters):
            prev_unsatisfied_donors = None
            if iteration > 1:
                prev_unsatisfied_donors = self.unsatisfied_donors_by_cluster[
                    icluster]
            # Extract unsatisfied donors that are not bulk solvent accessible
            # and histidines
            unsatisfied_donors_in_cluster = []
            hips = []
            for changeable in cluster.changeables:
                for iheavy in changeable.get_heavies():
                    if iheavy in unsatisfied_donors_indices:
                        unsatisfied_donors_in_cluster.append(iheavy)
                if changeable.type == 'HISTIDINE':
                    hips.append(changeable)
            self.unsatisfied_donors_by_cluster[
                icluster] = unsatisfied_donors_in_cluster
            if unsatisfied_donors_in_cluster:
                report(LOG_BASIC,
                       f"Unsatisfied donors found in cluster {icluster}:")
                for idonor in unsatisfied_donors_in_cluster:
                    atom = working_ct.atom[idonor]
                    report(
                        LOG_BASIC,
                        f"    {atom.pdbres} {atom.chain}{atom.resnum}{atom.inscode} {atom.pdbname}"
                    )
            # There is nothing to change, since there are no HIP residues or
            # forced neutral states.
            if not hips:
                continue
            # Penalize the charged states for a histidine. check the current
            # penalties to determine which histidines have already been
            # penalized.
            current_unsatisfied_donors = self.unsatisfied_donors_by_cluster[
                icluster]
            if iteration == 1:
                hip_to_penalize_index = 0
            else:
                hip_to_penalize_index = None
            redo_cluster = False
            for n, hip in enumerate(hips):
                if hip.force_neutral:
                    redo_cluster = True
                    hip.force_neutral = False
                    hip_to_penalize_index = n + 1
                    if len(current_unsatisfied_donors) < len(
                            prev_unsatisfied_donors):
                        # Forced accepter interaction found for histidine. pKa
                        # is acutally lowered cause the shift is a negative number.
                        if hip.current_state in (0, 1):
                            hip.pka += HistidinepKaPredictor.FORCED_ACCEPTOR_PKA_SHIFT
                        elif hip.current_state in (3, 4):
                            hip.pka_flip += HistidinepKaPredictor.FORCED_ACCEPTOR_PKA_SHIFT
                        report(
                            LOG_BASIC,
                            f"Found forced acceptor state for {hip.name}. Lowering pKa to {hip.pka:.2f}"
                        )
                    # This will reset the pH penalties etc.
                    hip.change_empirical_pka(self.propka_pH)
                    break
                if hip.force_charged:
                    redo_cluster = True
                    hip.force_charged = False
                    hip_to_penalize_index = n + 1
                    # Don't apply forced acceptor rule if pKa is already very
                    # low or if we still get the neutral form.
                    if hip.current_state not in [
                            2, 5
                    ] or min(hip.pka, hip.pka_flip) < 4.0:
                        hip.change_empirical_pka(self.propka_pH)
                        break
                    if len(current_unsatisfied_donors) > len(
                            prev_unsatisfied_donors):
                        if hip.current_state == 2:
                            hip.pka += HistidinepKaPredictor.FORCED_ACCEPTOR_PKA_SHIFT
                        elif hip.current_state == 5:
                            hip.pka_flip += HistidinepKaPredictor.FORCED_ACCEPTOR_PKA_SHIFT
                        report(
                            LOG_BASIC,
                            f"Found forced acceptor state for {hip.name}. Lowering pKa to {hip.pka:.2f}"
                        )
                        # Reset the number of unsatisfied donors to the lower
                        # number as we had to force this histidine to be
                        # charged and it would be neutral in the non-forced
                        # state.
                        self.unsatisfied_donors_by_cluster[
                            icluster] = prev_unsatisfied_donors
                    # This will reset the pH penalties etc.
                    hip.change_empirical_pka(self.propka_pH)
                    break
            if redo_cluster:
                clusters_to_do.append(icluster)
            if hip_to_penalize_index is None or hip_to_penalize_index >= len(
                    hips):
                continue
            # Don't force neutral or charged state of histidine if the pKa is
            # already very low.
            for i in range(hip_to_penalize_index, len(hips)):
                hip = hips[i]
                if min(hip.pka, hip.pka_flip) > 4.0:
                    break
                report(
                    LOG_BASIC,
                    f"Skipping forced acceptor state search for {hip.name} due to already low pKa."
                )
            else:
                # If we reach the end we are done, and can check the next cluster.
                continue
            if hip.current_state in [2, 5]:
                hip.force_neutral = True
                report(
                    LOG_BASIC,
                    f"Recalculating hydrogen bond network using forced neutral state for {hip.name}"
                )
            elif hip.pka:
                hip.force_charged = True
                report(
                    LOG_BASIC,
                    f"Recalculating hydrogen bond network using forced charged state for {hip.name}"
                )
            # This will update the penalties
            hip.change_empirical_pka(self.propka_pH)
            clusters_to_do.append(icluster)
        return sorted(set(clusters_to_do))

[docs]    def recalculate_propka_pkas(self, ct):
        report(LOG_BASIC, '')
        report(
            LOG_BASIC,
            'Recalculating pKas to identify changes in protonation penalties...'
        )
        self.assign_best_combinations(ct)
        try:
            clusters_to_do = self.find_protonation_state_changes(ct)
        except PropKaException:
            report(
                LOG_NONE,
                'WARNING: PropKa failed when looking for changes in protonation state penalties.'
            )
            clusters_to_do = []
        if len(clusters_to_do) > 0:
            report(
                LOG_BASIC,
                '    The following clusters have new protonation state penalties, and will be re-run:'
            )
            report(LOG_BASIC,
                   '        ' + ','.join([str(i + 1) for i in clusters_to_do]))
        else:
            report(LOG_BASIC, '    No changes in protonation penalties found.')
        report(LOG_BASIC, '')
        return clusters_to_do

[docs]    def minimize_hydrogens(self, ct):
        hydrogens = []
        for cluster in self.clusters:
            for changeable in cluster.changeables:
                if changeable.type in ['ROTATABLE', 'WATER']:
                    changeable_sites = changeable.get_state_sites(
                        ct, changeable.current_state)
                    hydrogens += [i[1] for i in changeable_sites[1]]
        minimizer = minimize.Minimizer(min_method=minimize.MinCG)
        minimizer.setStructure(ct)
        for atom in ct.atom:
            if int(atom) not in hydrogens:
                minimizer.addPosFrozen(int(atom))
        minimizer.minimize()
        return

[docs]    def restore_zobs(self, ct):
        num_zobs = ct.property.get('i_pa_num_zobs')
        if num_zobs is None:
            return
        for i in range(num_zobs):
            atoms = analyze.evaluate_asl(ct, '(atom.i_pa_zob%d = 1)' % i)
            if len(atoms) == 2:
                ct.addBond(atoms[0], atoms[1], 0)
            for iatom in atoms:
                del ct.atom[iatom].property['i_pa_zob%d' % i]
        del ct.property['i_pa_num_zobs']
        return

[docs]    def cleanup(self, ct):
        self.restore_zobs(ct)
        atoms_to_delete = analyze.evaluate_asl(
            ct, "withinbonds 1 (atom.i_pa_xtalatom 1)")
        if len(atoms_to_delete) == 0:
            ct.retype()
            return
        ct.deleteAtoms(atoms_to_delete)
        for atom in ct.atom:
            if 'i_pa_atomindex' in atom.property:
                del atom.property['i_pa_atomindex']
            if 'i_pa_xtalatom' in atom.property:
                del atom.property['i_pa_xtalatom']
            if 'i_pa_surfaceatom' in atom.property:
                del atom.property['i_pa_surfaceatom']
        ct.retype()
        return

[docs]    def summarize_pkas(self):
        first = True
        for changeable in self.changeables:
            if changeable.pka is None:
                continue
            if first:
                report(LOG_BASIC, '\nFinal pKas:')
                first = False
            pka = changeable.pka
            # Histidine has two pKa's, one for each flip state
            if changeable.type == 'HISTIDINE' and changeable.current_state > 2:
                if changeable.pka_flip is not None:
                    pka = changeable.pka_flip

            report(LOG_BASIC, f'  {changeable.name} : {pka:.2f}')


if __name__ == '__main__':

    if len(sys.argv) != 3:
        print("Please specify 2 arguments: input and output file names.")
        sys.stdout.flush()
        sys.exit(1)

    inputfile_name = sys.argv[1]
    outputfile_name = sys.argv[2]

    try:
        structure_ct = structure.Structure.read(inputfile_name)
    except:
        print("ERROR: unable to open file " + inputfile_name)
        sys.stdout.flush()
        sys.exit(1)

    print("Structure extracted from file: " + inputfile_name)
    print("Output structure will be written to: " + outputfile_name)
    sys.stdout.flush()

    instance = ProtAssign(structure_ct,
                          interactive=False,
                          asl='',
                          noprot_asl='',
                          use_xtal=False,
                          do_flips=True,
                          freeze_existing=False,
                          include_initial=False,
                          max_comb=500000,
                          num_sequential_cycles=30,
                          max_cluster_size=None,
                          add_labels=True,
                          label_pkas=True,
                          pH=pH_neutral,
                          use_propka=True,
                          propka_pH=7.0,
                          user_states=[],
                          minimize=False)
    structure_ct.write(outputfile_name)

    print("\nComplete. Output file: %s" % outputfile_name)
    sys.stdout.flush()