import os
import re
from schrodinger import structure
from schrodinger.infra import canvas
from schrodinger.infra import phase
from schrodinger.job import jobcontrol
from schrodinger.utils import fileutils
from schrodinger.utils import log
from schrodinger.utils import multifpfile
from schrodinger.utils.multifpfile import FP_NAME_TO_TYPE
from schrodinger.utils.multifpfile import FP_TYPE_TO_NAME
from schrodinger.utils.multifpfile import make_fp_generator
LOGGER_NAME = 'hitexpander'
fileutils.MAESTRO, fileutils.SMILESCSV)
[docs]def fp_types():
Returns a (sorted) list of the available fingerprint type names.
return sorted(list(FP_TYPE_TO_NAME.values()))
[docs]def make_fp_generator_from_canvas_fpfile(filename):
Instantiates fingerprint generator from Canvas FP file.
:param filename: Canvas FP file name.
:type filename: string
:return: Fingerprint generator.
:rtype: `canvas.ChmFPOut32`
fpinfo = canvas.getFPInfo(filename)
if not fpinfo.isValid():
raise RuntimeError("could not determine fingerprint traits in '%s'" %
if fpinfo.is32Bit():
return fpinfo.toFPOut()
raise RuntimeError("unexpected fingerprints precision in '%s'" %
[docs]def is_phdb_path(path):
return os.path.isabs(path) and path.lower().endswith('.phdb')
def _raise_unsupported_format(fn):
raise UnsupportedFormat("'%s': unsupported structure file format." % fn)
def _apply_isub(db, molids=None, isub=''):
Filters out unavailable/undesired/disallowed molecule IDs.
:param db: Phase DB.
:type db: `schrodinger.infra.phase.PhpDatabaseFp`
:param molids: Iterable over desired molecule IDs.
:type molids: iterable over int
:param isub: Phase DB subset file name.
:type isub: str
:return: Subset of `molids` allowed by `isub` available from `db`.
:rtype: set(int) or list(int)
available_ids = db.getAllIDs()
if isub or molids:
available_ids = set(available_ids)
if isub:
isub_ids = phase.read_phase_subset(isub)
available_ids &= set(isub_ids)
return available_ids if molids is None else available_ids & set(molids)
[docs]def count_ligands(path, isub=''):
if is_phdb_path(path):
db = phase.PhpDatabaseFp(path)
available_ids = _apply_isub(db, molids=None, isub=isub)
return len(available_ids)
except phase.PhpException as e:
raise UnsupportedFormat(e)
return structure.count_structures(path)
except Exception:
[docs]def structure_file_reader(filename, logger=None, molids=None, keepsmiles=False):
Generator that yields `(mol_id, st)` tuples for the
structures in the file identified by `filename`. The file
can be in Maestro/SD/SMILES format.
:param filename: File name.
:type path: str
:param logger: Logger for warnings.
:type logger: `logging.Logger`
:param molids: IDs (1-based indices) of the desired structures.
`None` means "all".
:type molids: iterable over integers
:param keepsmiles: Should SMILES text be converted into
`schrodinger.structure.Structure` instances?
:type keepsmiles: bool
format = fileutils.get_structure_file_format(filename)
if format == fileutils.MAESTRO:
reader = structure.MaestroReader(filename)
elif format == fileutils.SD:
reader = structure.SDReader(filename)
elif format == fileutils.SMILES:
reader = structure.SmilesReader(filename)
elif format == fileutils.SMILESCSV:
reader = structure.SmilesCsvReader(filename)
is_smiles = format in (fileutils.SMILES, fileutils.SMILESCSV)
if is_smiles:
adaptor = canvas.ChmMmctAdaptor()
if not logger:
logger = log.get_output_logger(LOGGER_NAME)
molids_set = set(molids) if molids is not None else None
for (i, st) in enumerate(reader, 1):
if molids_set is not None and i not in molids_set:
if is_smiles:
if keepsmiles:
yield (i, st.smiles)
mol = canvas.ChmMol.fromSMILES(st.smiles)
# canvas2d depends on OpenGL, therefore
# 1 == canvas2d.ChmAtomOption.H_ExplicitOnly
canvas.CHM_FORCE2D(mol, True, 1)
# we need to have 2D coordinates or else going
# to disappoint mmstereo and/or rdkit_adapter
except RuntimeError as e:
# 5 == canvas2d.optionMDL.H_Visible
yield (i, structure.Structure(adaptor.create(mol, True, 5)))
yield (i, st)
[docs]def phdb_mol_reader(path, molids=None, isub=''):
Generator that yields `(mol_id, st)` tuples for the
ligands in the Phase DB.
:param path: Path to the Phase DB.
:type path: str
:param molids: IDs of the desired structures. `None` means "all".
:type molids: iterable over integers
:param isub: Subset file name (ENUM-285).
:type isub: str
db = phase.PhpDatabaseFp(path)
available_ids = _apply_isub(db, molids, isub)
except phase.PhpException as e:
raise UnsupportedFormat(e)
for i in sorted(available_ids):
yield (i, structure.Structure(db.getCt(i)))
[docs]class UnavailableFingerprintType(KeyError):
def _unavailable_fptype(path, kind):
raise UnavailableFingerprintType(
"fingerprints of type '%s' are not available from '%s'" % (kind, path))
[docs]def phdb_fpreader(path, kind, molids=None, isub=''):
Generator yielding `(mol_id, fp)` tuples for fingerprints
of the desired `kind` for the molecules selected via `molids`
(in order of sorted `molids`) from Phase DB pointed to by `path`.
:param path: Path to Phase DB.
:type path: str
:param kind: Name of the desired fingerprints type.
:type typename: str
:param molids: Identificators of the molecules for which
fingerprints are to be loaded.
:type molids: iterable over int
:param isub: Subset file name (ENUM-285).
:type isub: str
db = phase.PhpDatabaseFp(path)
available_ids = _apply_isub(db, molids, isub)
except phase.PhpException as e:
raise ValueError(str(e))
fptype = FP_NAME_TO_TYPE[kind]
if not db.isStored(fptype):
_unavailable_fptype(path, kind)
for i in sorted(available_ids):
yield (i, db.getFp(fptype, i, True))
[docs]def multifpfile_fpreader(path, kind, molids=None):
Generator yielding `(mol_id, fp)` tuples for fingerprints
of the desired `kind` for the molecules selected via `molids`
(in order of sorted `molids`) from multi-fingerprint file
pointed to by `path`.
:param path: Path to the multi-fingerprint file.
:type path: str
:param kind: Name of the desired fingerprints type.
:type typename: str
:param molids: Identificators of the molecules for which
fingerprints are to be loaded.
:type molids: int containment checkable or `None`
with multifpfile.MultiFPFile(path) as src:
if kind not in src.get_typenames():
_unavailable_fptype(path, kind)
for (i, fp) in src.iter_fingerprints(typenames=[kind], molids=molids):
yield (i, fp)
[docs]def structure_fpreader(path, kind, molids=None, logger=None):
Generator yielding `(mol_id, fp)` tuples for fingerprints
of the desired `kind` for the molecules selected via `molids`
(in order of sorted `molids`) from multi-fingerprint file
pointed to by `path`.
:param path: Path to the multi-fingerprint file.
:type path: str
:param kind: Name of the desired fingerprints type.
:type typename: str
:param molids: Identificators of the molecules for which
fingerprints are to be loaded.
:type molids: int containment checkable or `None`
# generate on-the-fly
adaptor = canvas.ChmMmctAdaptor()
fprinter = make_fp_generator(kind)
for (i, st) in structure_file_reader(path, logger):
if molids and i not in molids:
chmol = adaptor.create(st)
yield (i, multifpfile.bitset_to_list(fprinter.generate(chmol)))
[docs]def canvasfp_fpreader(path, molids=None, logger=None):
Generator yielding `(mol_id, fp)` tuples for fingerprints
of the molecules selected via `molids` (in order of sorted
`molids`) from Canvas FP file pointed to by `path`.
:param path: Path to the Canvas FP file.
:type path: str
:param molids: Identificators of the molecules for which
fingerprints are to be loaded.
:type molids: iterable over int
fpinfo = canvas.getFPInfo(path)
if fpinfo.is32Bit():
freader = canvas.ChmFPIn32(path)
elif fpinfo.is64Bit():
freader = canvas.ChmFPIn64(path)
raise ValueError("unexpected fingerprints precision in '%s'" % path)
if molids:
for db_pos in sorted(molids):
db_fp = next(freader) # fingerprint at db_pos
yield (db_pos, multifpfile.bitset_to_list(db_fp))
while freader.hasNext():
db_pos = freader.getPos() # 1-based
db_fp = next(freader) # fingerprint at db_pos
yield (db_pos, multifpfile.bitset_to_list(db_fp))
[docs]def check_fp_availability(path, kinds):
Check whether fingerprints of the desired type are available, raise
exception if they are not.
:param path: Name of the structure/multi-fingerprint file or Phase DB.
:type path: str
:param kinds: Iterable over desired fingerprint kinds.
:type kind: iterable over str
def raise_unavailable(what):
raise UnavailableFingerprintType(
"fingerprints of type '%s' are not available from '%s'" %
(what, path))
if is_phdb_path(path):
db = phase.PhpDatabaseFp(path)
except phase.PhpException as e:
raise UnsupportedFormat(e)
for name in kinds:
fp_type = FP_NAME_TO_TYPE.get(name, None)
if fp_type is None or not db.isStored(fp_type):
format = fileutils.get_structure_file_format(path)
if format is not None:
for name in kinds:
if name not in FP_NAME_TO_TYPE:
types = multifpfile.get_fingerprint_types(path)
if types is None and kinds:
for name in kinds:
if name not in types:
[docs]def prog_for_argparse(scriptfile):
Returns strings like "$SCHRODINGER/run".
:param scriptfile: Name of a script file.
:type scritpfile: str
:return: String to be used to launch the script.
:rtype: str
scriptfile = os.path.basename(scriptfile)
scriptfile = re.sub(r'\.py.$', r'.py', scriptfile, flags=re.IGNORECASE)
return fileutils.SCHRODINGER_RUN_STR + ' ' + scriptfile
[docs]def add_isub_option(parser):
Adds -isub option to the `parser` (ENUM-285).
:param parser: Command line argument parser.
:type parser: `argparse.ArgumentParser`
help='Process only Phase DB subset defined in '
'the <subset>_phase.inp file.')
[docs]def massage_isub_option(parser, args):
Processes -isub option (ENUM-285).
:param parser: Command line argument parser.
:type parser: `argparse.ArgumentParser`
:param args: Namespace holding command line arguments.
:type args: `argparse.Namespace`
using_phdb = (getattr(args, 'database', False) or
getattr(args, 'phase_db', False))
subset = getattr(args, 'isub', None)
if subset and not using_phdb:
parser.error('-isub is not applicable for input other than Phase DB.')
if not subset:
ext = phase.PHASE_SUBSET_EXT
if subset.endswith(ext):
ext = ''
args.isub = jobcontrol.get_runtime_path(subset + ext)
if not os.path.isfile(args.isub):
parser.error(f'missing subset file: "{args.isub}"')