"""
Module to generate and evaluate Smiles/SMARTS pattern for both CG and all
atomic structure using RDKIT
Copyright Schrodinger, LLC. All rights reserved.
"""
import re
import functools
from collections import OrderedDict
from rdkit import Chem
from schrodinger import adapter
from schrodinger.application.matsci import msutils
from schrodinger.rdkit import coarsegrain
from schrodinger.test import ioredirect
from schrodinger.thirdparty import rdkit_adapter
PERIODIC_TABLE = Chem.rdchem.GetPeriodicTable()
MAX_MATCHES = 1000000000  # See DESMOND-11242 and RB 65689
[docs]def to_smarts(struct,
              sanitize=True,
              include_stereo=True,
              atom_subset=None,
              check_connectivity=False):
    """
    Get SMARTS for a given structure
    :param schrodinger.structure.Structure struct: Structure for which patterns
        need to be selected
    :param bool sanitize: Whether RDKit sanitization should be performed. This
        option is not applicable for coarsegrained structures.
    :type include_stereo: bool
    :param include_stereo: Whether the stereochemistry of the structure
        should be translated into the RDKit mol. It also enables inclusion of
        information about stereochemistry in the SMARTS. Setting to `False` can
        speed this up substantially.
    :param bool check_connectivity: Whether to check that all the atoms from the
        atom_subset (or entire structure if it is None) are from one molecule.
        Raise ValueError if it is not the case.
    :param list atom_subset: List of atom indices. If None then SMARTS for full
        structure is computed
    :rtype: str
    :return: SMARTS pattern for the atom ids provided.
    """
    if not struct.atom_total:
        # Empty structure
        return ''
    if atom_subset is not None:
        atom_subset = sorted(set(atom_subset))
    if check_connectivity:
        nmols = (struct.mol_total if atom_subset is None else
                 struct.extract(atom_subset).mol_total)
        if nmols > 1:
            raise ValueError('SMARTS generation only works on connected sets '
                             'of atoms.')
    # See RB 67298 discussion on speed issues when `include_stereo=True`
    pattern = Pattern(struct, sanitize=sanitize, include_stereo=include_stereo)
    return pattern.toSmarts(atom_ids=atom_subset, isomeric=include_stereo) 
[docs]def to_smiles(struct,
              sanitize=True,
              include_stereo=True,
              atom_ids=None,
              fall_back=False):
    """
    Get SMILES for a given structure
    :type struct: `schrodinger.structure.Structure`
    :param struct: Structure for which patterns need to be selected
    :type sanitize: bool
    :param sanitize: Whether RDKit sanitization should be performed. This
        option is not applicable for coarsegrained structures.
    :type include_stereo: bool
    :param include_stereo: Whether the stereochemistry of the structure
         should be translated into the RDKit mol. It also enable to include
         information about stereochemistry in the SMILES.
    :type atom_ids: list
    :param atom_ids: list of atom indices. If None then SMARTS for full
        structure is computed
    :type fall_back: bool
    :param fall_back: Ignored if sanitize=False. If sanitize=True, will
            fall back to using a non-sanitized structure if sanitization fails.
    :rtype: str
    :return: SMILES pattern for the atom ids provided
    """
    pattern = Pattern(struct,
                      sanitize=sanitize,
                      include_stereo=include_stereo,
                      fall_back=fall_back)
    return pattern.toSmiles(atom_ids=atom_ids, isomeric=include_stereo) 
[docs]def has_query_stereo(query):
    """
    Checks if rdkit molecule has stereo centers.
    :param rdkit.Chem.rdchem.Mol query: Input molecule
    :rtype: bool
    :return: Whether molecule has stereo centers
    """
    for at in query.GetAtoms():
        if at.GetChiralTag() != Chem.CHI_UNSPECIFIED:
            return True
    for bnd in query.GetBonds():
        if bnd.GetStereo() not in (Chem.BondStereo.STEREONONE,
                                   Chem.BondStereo.STEREOANY):
            return True
    return False 
[docs]def evaluate_smarts(struct,
                    smarts,
                    is_cg=None,
                    sanitize=True,
                    uniquify=True,
                    max_matches=MAX_MATCHES):
    """
    Get the list of matches for the passed SMARTS pattern in the reference
    structure
    :type struct: `schrodinger.structure.Structure`
    :param struct: Structure for which patterns need to be selected
    :type smarts: str
    :param smarts: SMARTS pattern to find
    :type is_cg: bool or None
    :param is_cg: Whether structure is CG. If None, perform a check
    :type sanitize: bool
    :param sanitize: Whether RDKit sanitization should be performed. This
        option is not applicable for coarsegrained structures.
    :type uniquify: bool
    :param uniquify: if True, return only unique sets of matching atoms
    :type max_matches: int
    :param max_matches: the maximum number of matches to return
    :rtype: list or None
    :return: list of list atom/particle indices with matching SMARTS.
    """
    is_cg = (msutils.is_coarse_grain(struct, by_atom=True)
             if is_cg is None else is_cg)
    if is_cg:
        _, mapper = coarsegrain.prepare_cg_for_rdkit(struct)
        pattern = Pattern.patternTranslate(smarts, mapper)
    else:
        pattern = smarts
    smarts_mol = Chem.MolFromSmarts(pattern)
    if not smarts_mol:
        raise RuntimeError('Invalid SMARTS pattern: %s.' % smarts)
    has_stereo = has_query_stereo(smarts_mol)
    pattern = Pattern(struct, sanitize=sanitize, include_stereo=has_stereo)
    return pattern._getPatternMatches(smarts_mol,
                                      uniquify=uniquify,
                                      use_chirality=has_stereo,
                                      max_matches=max_matches) 
[docs]def evaluate_smarts_by_molecule(struct,
                                smarts,
                                uniquify=True,
                                matches_by_mol=False,
                                molecule_numbers=None):
    """
    Takes a structure and a SMARTS pattern and returns a list of all
    matching atom indices, where each element in the list is a group
    of atoms that match the the SMARTS pattern.
    :param structure.Structure struct: the structure to search
    :param str smarts: the SMARTS pattern to match
    :param bool uniquify: if True, return only unique sets of matching atoms
    :param bool matches_by_mol: if True then rather than returning a list of
        matches return a dictionary of matches key-ed by molecule number
    :param set molecule_numbers: set of molecule numbers in the structure to be
        used instead of the entire structure
    :rtype: list or dict
    :return: For the list (if matches_by_mol is False) each value is a list of
        atom indices matching the SMARTS pattern, for the dict (if matches_by_mol
        is True) keys are molecule indices and values are lists of matches for that
        molecule
    """
    is_cg = msutils.is_coarse_grain(struct, by_atom=True)
    matches = {}
    # SMARTs patterns are intra-molecular
    # so we iterate over molecules in struct
    if molecule_numbers is None:
        molecules = struct.molecule
    else:
        # Using enumerate as structure.molecule[x] is very slow #SHARED-6204
        molecules = [
            mol for mol in struct.molecule if mol.number in molecule_numbers
        ]
    for mol in molecules:
        new_to_old = dict(zip(range(1,
                                    len(mol.atom) + 1), mol.getAtomIndices()))
        match = evaluate_smarts(mol.extractStructure(),
                                smarts,
                                is_cg=is_cg,
                                uniquify=uniquify,
                                max_matches=MAX_MATCHES)
        # Remove empty matches and use structure atom indexes
        # rather than by-molecule atom indexes
        for a_match in match:
            if a_match:
                b_match = [new_to_old[x] for x in a_match]
                if mol.number not in matches:
                    matches[mol.number] = []
                matches[mol.number].append(b_match)
    if not matches_by_mol:
        matches = [
            match for mol_match in matches.values() for match in mol_match
        ]
    return matches 
[docs]def validate_smarts(smarts, struct=None, is_cg=None):
    """
    Validate smarts. Works both with AA and CG.
    :param str smarts: SMARTS to validate
    :type struct: structure.Structure or None
    :param struct: If None, validate as AA SMARTS. If present, validate either
        as AA or CG
    :type is_cg: bool or None
    :param is_cg: Whether structure is CG. If None, perform a check
    :rtype: str or None
    :return: Error message on error, None if SMARTS is valid
    """
    mapper = None
    if struct:
        is_cg = (msutils.is_coarse_grain(struct, by_atom=True)
                 if is_cg is None else is_cg)
        if is_cg:
            try:
                _, mapper = coarsegrain.prepare_cg_for_rdkit(struct)
            except RuntimeError as err:
                return str(err)
    if isinstance(smarts, str):
        smarts = [smarts]
    for pattern in smarts:
        if mapper:
            pattern = Pattern.patternTranslate(pattern, mapper)
        try:
            with ioredirect.IOSilence():
                adapter.validate_smarts(pattern)
        except ValueError as err:
            return str(err) 
[docs]def has_stereo_smarts(smarts, struct=None, is_cg=None):
    """
    Check if SMARTS requires stereo. Works both with AA and CG.
    :type: str or list[str]
    :param smarts: SMARTS to validate
    :type struct: structure.Structure or None
    :param struct: If None, validate as AA SMARTS. If present, validate either
        as AA or CG
    :type is_cg: bool or None
    :param is_cg: Whether structure is CG. If None, perform a check
    :rtype: bool or list[bool]
    :return: Whether SMARTS require stereo or not
    """
    mapper = None
    if struct:
        is_cg = (msutils.is_coarse_grain(struct, by_atom=True)
                 if is_cg is None else is_cg)
        if is_cg:
            # This can raise RuntimeError
            _, mapper = coarsegrain.prepare_cg_for_rdkit(struct)
    if isinstance(smarts, str):
        smarts = [smarts]
    ret = []
    for pattern in smarts:
        if mapper:
            pattern = Pattern.patternTranslate(pattern, mapper)
        smarts_mol = Chem.MolFromSmarts(pattern)
        if smarts_mol is None:
            # Bad SMARTS
            ret.append(None)
            continue
        has_stereo = has_query_stereo(smarts_mol)
        ret.append(has_stereo)
    if isinstance(smarts, str):
        return ret[0]
    else:
        return ret 
[docs]def symbol_to_number(symbol):
    """Transforms chemical symbols into the corresponding atomic numbers.
    :type symbol: str
    :param symbol: Chemical symbol
    :rtype: int/None
    :return: Atomic numbers corresponding to entered symbol if valid atomic
        symbol is entered else return None
    """
    with rdkit_adapter.suppress_rdkit_log():
        try:
            atomic_num = PERIODIC_TABLE.GetAtomicNumber(symbol)
        except RuntimeError:
            return
        return atomic_num 
[docs]class DictCache(OrderedDict):
    """
    A first in first out dictionary cache which caches the key and associated
    value.
    """
[docs]    def __init__(self, maxcount=10000):
        """
        Constructs a new lru cache.
        :param maxcount: The maximum number of data that the cache can hold
        :type maxcount: int
        """
        super().__init__()
        self.maxcount = maxcount 
    def __setitem__(self, key, value):
        """
        Set the value for the passed key
        :param key: The key
        :type key: string or tuple
        :param value: The value than any dictionary can hold
        :type value: string
        """
        super().__setitem__(key, value)
        # Maintain size limit for the cache
        if len(self) > self.maxcount:
            self.popitem(last=False) 
[docs]class Pattern:
    """
    A class to calculate calculate SMARTS and SMILES pattern for a structure
    multiple times. The class can be memory intensive to allow for increased
    speed.
    """
    PROTECTED_PATTERN_BIT = coarsegrain.PROTECTED_PATTERN_BIT
[docs]    def __init__(self,
                 struct,
                 is_cg=None,
                 sanitize=True,
                 include_stereo=True,
                 fall_back=False):
        """
        Initiate Pattern class
        :type struct: `schrodinger.structure.Structure`
        :param struct: Structure for which patterns need to be selected
        :type is_cg: bool or None
        :param is_cg: Whether structure is CG. If None, perform a check
        :type sanitize: bool
        :param sanitize: Whether RDKit sanitization should be performed. This
            option is not applicable for coarsegrained structures.
        :type include_stereo: bool
        :param include_stereo: Whether the stereochemistry of the structure
            should be translated into the RDKit mol. Setting to `False` can
            speed this up substantially.
        :type fall_back: bool
        :param fall_back: Ignored if sanitize=False. If sanitize=True, will
            fall back to using a non-sanitized structure if sanitization fails.
        """
        self.struct = struct
        self._sanitized = sanitize
        # Fallback is required only when sanitization if True. For
        # sanitization = False, it will be perform same conversion procedure
        # twice if fallback is not updated.
        # This will ensure that conversion is tried only once.
        fall_back = fall_back if sanitize else False
        self.is_coarse_grain = (msutils.is_coarse_grain(struct, by_atom=True)
                                if is_cg is None else is_cg)
        if self.is_coarse_grain:
            self._mol, self._particle_proxy_mapper = \
                
coarsegrain._coarsegrain_st_to_rdkit(struct)
            self._generateMapper()
        else:
            # include_properties and include_coordinates are set to false in
            # adapter.evaluate_smarts, match here. If changing please check the
            # performance (MATSCI-11446). See RB 67298 discussion on speed
            # issues when `include_stereo=True`
            with rdkit_adapter.suppress_rdkit_log():
                try:
                    self._mol = rdkit_adapter.to_rdkit(
                        struct,
                        include_properties=False,
                        include_coordinates=False,
                        sanitize=sanitize,
                        include_stereo=include_stereo)
                except ValueError:
                    if fall_back:
                        self._mol = rdkit_adapter.to_rdkit(
                            struct,
                            include_properties=False,
                            include_coordinates=False,
                            sanitize=False,
                            include_stereo=include_stereo)
                        self._sanitized = False
                    else:
                        raise
        # Cached dicts
        self._smiles = DictCache()
        self._smarts = DictCache()
        # RDKIT mol atomic index and schrodinger structure index mappers
        self.stidx_to_molidx = rdkit_adapter.get_map_sdgr_to_rdk(self._mol)
        self.molidx_to_stidx = rdkit_adapter.get_map_rdk_to_sdgr(self._mol) 
    def _generateMapper(self):
        """
        Generate mapper for proxy element to particle name
        """
        self._proxy_particle_mapper = {}
        for pname, ename in self._particle_proxy_mapper.items():
            self._proxy_particle_mapper[ename] = pname
            self._proxy_particle_mapper[str(symbol_to_number(pname))] = ename
    @property
    def sanitized(self):
        """
        Get whether the structure was sanitized or not
        :return: sanitization status of the structure
        :rtype: bool
        """
        return self._sanitized
    @property
    def smiles(self):
        """
        Get the SMILES for the passed structure
        :returns: SMILES pattern for the passed structure
        :rtype: str
        """
        return self.toSmiles()
    @smiles.setter
    def smiles(self, value):
        """
        Overwrite SMILES setter to not allow user to set it.
        :param value: SMILES
        :type value: str
        :raises AttributeError: Always raise error since setting SMILES is
            not allowed.
        """
        raise AttributeError('SMILES cannot be set for a structure')
[docs]    def getPattern(self,
                   atom_ids=None,
                   is_smiles_requested=False,
                   isomeric=True):
        """
        Get SMILES/SMARTS for full structure or for substructure of given atom
        ids.
        :type atom_ids: list
        :param atom_ids: list of atom indices
        :type is_smiles_requested: bool
        :param is_smiles_requested: return Smiles pattern if True else Smarts
        :type isomeric: bool
        :param isomeric: include information about stereochemistry in the
            SMILES/SMARTS
        :rtype: str
        :return: SMILES pattern for the atom ids provided
        """
        atom_ids = tuple(atom_ids) if atom_ids else None
        dict_cache = self._smiles if is_smiles_requested else self._smarts
        if atom_ids in dict_cache:
            return dict_cache[atom_ids]
        if is_smiles_requested:
            if atom_ids:
                pattern = Chem.MolFragmentToSmiles(self._mol,
                                                   self.toRdIndices(atom_ids),
                                                   isomericSmiles=isomeric)
            else:
                pattern = Chem.MolToSmiles(self._mol, isomericSmiles=isomeric)
        else:
            if atom_ids:
                pattern = Chem.MolFragmentToSmarts(self._mol,
                                                   self.toRdIndices(atom_ids),
                                                   isomericSmarts=isomeric)
            else:
                pattern = Chem.MolToSmarts(self._mol, isomericSmiles=isomeric)
            pattern = self.proxyToParticleName(pattern)
        dict_cache[atom_ids] = pattern
        return pattern 
[docs]    def toSmiles(self, atom_ids=None, isomeric=True):
        """
        Get SMILES for subset of atom ids
        :type atom_ids: iterable
        :param atom_ids: atom indices
        :type isomeric: bool
        :param isomeric: include information about stereochemistry in the
            SMILES
        :rtype: str
        :return: SMILES pattern for the atom ids provided
        """
        return self.getPattern(atom_ids=atom_ids,
                               is_smiles_requested=True,
                               isomeric=isomeric) 
    @property
    def smarts(self):
        """
        Get the SMARTS for the passed structure
        :returns: SMARTS pattern for the passed structure
        :rtype: str
        """
        return self.toSmarts()
    @smarts.setter
    def smarts(self, value):
        """
        Overwrite SMARTS setter to not allow user to set it
        :param value: SMARTS
        :type value: str
        :raises AttributeError: Always raise error since setting SMARTS is
            not allowed
        """
        raise AttributeError('SMARTS cannot be set for a structure')
[docs]    def toSmarts(self, atom_ids=None, isomeric=True):
        """
        Get SMARTS for subset of atom ids
        :type atom_ids: iterable
        :param atom_ids: atom indices
        :type isomeric: bool
        :param isomeric: include information about stereochemistry in the
            SMARTS
        :rtype: str
        :return: SMARTS pattern for the atom ids provided
        """
        return self.getPattern(atom_ids=atom_ids,
                               is_smiles_requested=False,
                               isomeric=isomeric) 
[docs]    @functools.lru_cache
    def evaluateSmiles(self,
                       smiles,
                       uniquify=True,
                       use_chirality=True,
                       max_matches=MAX_MATCHES):
        """
        Get the list of matches for the passed SMILES pattern in the reference
        structure
        :type smiles: str
        :param smiles: SMILES pattern to find
        :type uniquify: bool
        :param uniquify: if True, return only unique sets of matching atoms
        :type use_chirality: bool
        :param use_chirality: enables the use of stereochemistry in the matching
        :type max_matches: int
        :param max_matches: the maximum number of matches to return
        :rtype: list or None
        :return: list of list atom indices with matching SMILES.
        """
        smiles = self.particleNameToProxy(smiles)
        patt = Chem.MolFromSmiles(smiles, sanitize=False)
        return self._getPatternMatches(patt,
                                       uniquify=uniquify,
                                       use_chirality=use_chirality,
                                       max_matches=max_matches) 
[docs]    @functools.lru_cache
    def evaluateSmarts(self,
                       smarts,
                       uniquify=True,
                       use_chirality=True,
                       max_matches=MAX_MATCHES):
        """
        Get the list of matches for the passed SMARTS pattern in the reference
        structure
        :type smarts: str
        :param smarts: SMARTS pattern to find
        :type uniquify: bool
        :param uniquify: if True, return only unique sets of matching atoms
        :type use_chirality: bool
        :param use_chirality: enables the use of stereochemistry in the matching
        :type max_matches: int
        :param max_matches: the maximum number of matches to return
        :rtype: list or None
        :return: list of list atom/particle indices with matching SMARTS.
        """
        smarts = self.particleNameToProxy(smarts)
        mol = Chem.MolFromSmarts(smarts)
        return self._getPatternMatches(mol,
                                       uniquify=uniquify,
                                       use_chirality=use_chirality,
                                       max_matches=max_matches) 
[docs]    @staticmethod
    def patternTranslate(s_pattern, mapper):
        """
        Replace passed SMARTS/SMILES such that the mapper key values are
        replaced by
        :param s_pattern: The SMARTS/SMILES pattern to change
        :type s_pattern: str
        :param mapper: The mapper used to convert the SMARTS/SMILES pattern
        :type mapper: dictionary where the key is the element name to find in
            the pattern and value is the name to replace it with
        :returns: the converted SMARTS/SMILES pattern
        :rtype: str
        """
        split_pattern = [x for x in re.split(r'\[|\]', s_pattern) if x]
        for sidx, smarts_val in enumerate(split_pattern):
            # Check if name is part of the split pattern, if not continue
            if smarts_val[0] not in Pattern.PROTECTED_PATTERN_BIT:
                for mapper_name in mapper.keys():
                    if mapper_name in smarts_val:
                        break
                else:
                    continue
            # Check if pattern matches mapper atom name
            if smarts_val in mapper.keys():
                split_pattern[sidx] = f'[{mapper[smarts_val]}]'
                continue
            # Remove # for representing atomic number
            if smarts_val.startswith('#'):
                smarts_val = smarts_val[1:]
            # Get position where the element/atom name string ends
            search_text = re.search(r'\W+', smarts_val)
            pos = search_text.start() if search_text else len(smarts_val)
            alpha_num_str = smarts_val[:pos]
            non_alpha_str = smarts_val[pos:]
            # Do not change anything for protected SMARTS pattern
            alpha_str = ''.join(x for x in alpha_num_str if x.isalpha())
            if (alpha_str not in Pattern.PROTECTED_PATTERN_BIT and
                    alpha_str in mapper.keys()):
                # Replace the element/atom name with the mapper value
                smarts_val = mapper[alpha_num_str] + non_alpha_str
            split_pattern[sidx] = f'[{smarts_val}]'
        converted_smarts = ''.join(split_pattern)
        return converted_smarts 
[docs]    def proxyToParticleName(self, smarts):
        """
        If the structure is a coarse-grained structure convert the SMARTS
        pattern of proxy elements to coarse grain particle name. Does nothing for
        atomistic structures
        :param smarts: The SMARTS pattern
        :type smarts: str
        :returns: The translated SMARTS pattern
        :rtype: str
        """
        if not self.is_coarse_grain:
            return smarts
        return self.patternTranslate(smarts, self._proxy_particle_mapper) 
[docs]    def particleNameToProxy(self, smarts):
        """
        If the structure is a coarse grain structure convert the SMARTS
        pattern of coarse grain particle name to proxy element name. Does
        nothing for atomistic structures
        :param smarts: The SMARTS pattern
        :type smarts: str
        :returns: The translated SMARTS pattern
        :rtype: str
        """
        if not self.is_coarse_grain:
            return smarts
        return self.patternTranslate(smarts, self._particle_proxy_mapper) 
[docs]    def toRdIndices(self, particle_indices):
        """
        Convert list of Schrodinger structure particle indices to RDMol atom
        indices
        :type particle_indices: tuple
        :param particle_indices: tuple of Schrodinger structure particle indices
        :rtype: list
        :return: list of RDMol atom indices
        """
        return [self.stidx_to_molidx[x] for x in particle_indices] 
[docs]    def toStIndices(self, particle_indices):
        """
        Convert list of RDMol atom indices to Schrodinger structure particle
        indices
        :type particle_indices: tuple
        :param particle_indices: tuple of RDMol atom indices
        :rtype: list
        :return: list of Schrodinger structure particle indices
        """
        return [self.molidx_to_stidx[x] for x in particle_indices] 
    def _getPatternMatches(self,
                           mol,
                           uniquify=True,
                           use_chirality=True,
                           max_matches=MAX_MATCHES):
        """
        Search for the passed pattern in the current structure
        :type mol: `rdkit.Chem.rdchem.Mol`
        :param mol: rdkit molecule
        :type uniquify: bool
        :param uniquify: if True, return only unique sets of matching atoms
        :type use_chirality: bool
        :param use_chirality: enables the use of stereochemistry in the matching
        :type max_matches: int
        :param max_matches: the maximum number of matches to return
        :rtype: list
        :returns: The list of atom/particle ids matches which matches the
            pattern
        """
        # Changing maxMatches to reflect MATSCI-6657
        rd_matches = self._mol.GetSubstructMatches(mol,
                                                   uniquify=uniquify,
                                                   useChirality=use_chirality,
                                                   useQueryQueryMatches=False,
                                                   maxMatches=max_matches)
        matches = []
        for rd_indices in rd_matches:
            matches.append(self.toStIndices(rd_indices))
        return matches
    def _getMolecularPattern(self, cache_dict, search_func):
        """
        Gets the SMILES/SMARTS pattern for all molecules
        :param cache_dict: The cache SMILES or SMARTS dictionary to check if
            value has already been calculated
        :type cache_dict: DictCache
        :param search_func: The rdkit function to calculate pattern
        :type search_func: function
        :returns: The dictionary where the key is the molecule number and
            pattern is the associated SMILES or SMARTS pattern for the molecule
        :rtype: dict
        """
        mol_patt = {}
        for mol_frag in Chem.rdmolops.GetMolFrags(self._mol,
                                                  asMols=True,
                                                  sanitizeFrags=False):
            # Get the corresponding atom/particle schrodinger structure indices
            st_aid = tuple(
                sorted(
                    x.GetIntProp(rdkit_adapter.SDGR_INDEX)
                    for x in mol_frag.GetAtoms()))
            # Set pattern if it is not already set in the cache
            if st_aid in cache_dict:
                patt_val = cache_dict[st_aid]
            else:
                patt_val = search_func(mol_frag)
                cache_dict[st_aid] = patt_val
            # Get the schrodinger structure molecule number and set the pattern
            # value
            st_mol_num = self.struct.atom[st_aid[0]].molecule_number
            mol_patt[st_mol_num] = patt_val
        return mol_patt
[docs]    def getMoleculeSmiles(self):
        """
        Get SMILES for each molecule in the structure
        :returns: The dictionary where the key is the molecule number and the
            value is the corresponding SMILES pattern
        :rtype: dict
        """
        return self._getMolecularPattern(self._smiles, Chem.MolToSmiles) 
[docs]    def getMoleculeSmarts(self):
        """
        Get SMARTS for each molecule in the structure
        :returns: The dictionary where the key is the molecule number and the
            value is the corresponding SMARTS pattern
        :rtype: dict
        """
        return self._getMolecularPattern(self._smiles, Chem.MolToSmarts) 
[docs]    def getUniqueMolNums(self, use_smarts=False):
        """
        Get the unique representative molecules in the structure
        :param bool use_smarts: If true the unique molecules will share the
            same SMARTS pattern. If false the unique molecules will share the
            same SMILES pattern.
        :rtype: list
        :return: list of molecule numbers that are unique
        """
        name_func = (self.getMoleculeSmarts
                     if use_smarts else self.getMoleculeSmiles)
        name_mol = {y: x for x, y in name_func().items()}
        return list(name_mol.values())