Source code for schrodinger.pipeline.pipeio
"""
Core Pipeline I/O classes (`Structures`, `Grid`, and `Text`, and `PhaseDB`).
Copyright Schrodinger, LLC. All rights reserved.
"""
import os
import sys
from schrodinger import structure
# Constants for built-in IO types:
STRUCTURES = "structures"
GRID = "grid"
TEXT = "text"
PHASEDB = "phasedb"
[docs]class PipeIO:
"""
Parent class for all Pipeline I/O classes. Subclasses hold data that is
passed between stages during execution.
"""
[docs] def getFiles(self):
"""
Return a list of files representing this object.
This method must be implemented in subclasses.
"""
[docs] def check(self):
"""
Make sure that the object is valid. If it's not valid (e.g. file
does not exist), raise a RuntimeError.
This method must be implemented in subclasses.
"""
[docs] def isFilled(self):
"""
Check whether the object is used or empty.
This method must be implemented in subclasses.
"""
[docs] def getCount(self):
"""
Return the number of items in this object.
This method may be overridden in subclasses.
"""
return None
[docs] def getOutputPaths(self):
"""
Subclasses may override for special behavior when getting files when
copying user output.
"""
return self.getFiles()
[docs]class Structures(PipeIO):
"""
A class to hold the names of structure files.
"""
[docs] def __init__(self, ligs=[], count=None): # noqa: M511
"""
Initialize the object with a list of ligand files.
The list can be replaced later with `setData`.
:type ligs: list
:param ligs: A list of ligand structure file names.
"""
self.setData(ligs)
self.type = STRUCTURES
self._count = count
[docs] def check(self):
"""
Make sure all files in the list exist. Raise a RuntimeError if "."
is in the list, and exit (sys.exit(1)) if any file can't be found.
"""
if self._structures:
if "." in self._structures:
raise RuntimeError(
"Error: pipeio.Structures: input needs to be a LIST of files!"
)
for f in self._structures:
if not os.path.exists(f):
print(
"ERROR: stages.pipeio.Structures: File does not exist:",
f)
if os.path.abspath(f):
print(
"File is specified as absolute path; should be local path."
)
sys.exit(1)
def __str__(self):
"""
Return a string representation of the object.
"""
if self._count:
s = "[structures(%i)]:" % self._count
else:
s = "[structures]:"
if self._structures:
for lig in self._structures:
s += "\n " + lig
else:
s += "\n * Empty *"
return s
[docs] def setData(self, ligfiles, count=None):
"""
Replace the list of ligand files with `ligfiles`.
:type ligfiles: list
:raise RuntimeError:
Raised if "." is in the list.
"""
if '.' in ligfiles:
raise RuntimeError(
"Structures.setData(): input must be a list of files.")
self._structures = []
for filename in ligfiles:
self._structures.append(filename)
if count:
self._count = count
else:
self._count = None
[docs] def getFiles(self):
"""
Return the list of ligand file names after checking that all of them
exist via the `check` method.
"""
self.check()
return self._structures
[docs] def isFilled(self):
return len(self._structures) > 0
[docs] def getCount(self, count_if_needed=False):
if self._count is None and count_if_needed:
self.count()
return self._count
[docs] def count(self):
"""
Return the number of structures in the set.
"""
if self._count is not None:
return self._count
else:
count = 0
for filename in self._structures:
count += structure.count_structures(filename)
self._count = count
return count
[docs]class Grid(PipeIO):
"""
A class to hold a set of grid files (compressed or uncompressed).
"""
[docs] def __init__(self, gridfile=None):
"""
:type gridfile: str
:param gridfile: The name of the grid file (for example,
`<gridjobname>.grd` or `<gridjobname>.zip`). The value
can be changed later with `setData`.
"""
self.setData(gridfile)
self.type = GRID
[docs] def check(self):
"""
Check that the grid file exists.
:raise RuntimeError:
Raised if the file is missing.
"""
if not self._gridfile:
return
if os.path.isfile(self._gridfile):
return # compressed file
else:
msg = "The following file is missing: %s" % self._gridfile
raise RuntimeError(msg)
[docs] def getPath(self):
"""
Return the grid file name.
"""
return self._gridfile
def __str__(self):
"""
Return a string representation of the object.
"""
s = "[grid]:"
s += "\n " + str(self._gridfile)
return s
[docs] def setData(self, gridfile):
"""
Replace the grid file name.
:type gridfile: str
:param gridfile:
The replacement grid file name.
"""
if gridfile is None:
self._gridfile = None
else:
ext = os.path.splitext(gridfile)[1]
if ext not in ('.grd', '.zip'):
msg = "VSW.Grid: Invalid grid path: %s" % gridfile
raise RuntimeError(msg)
self._gridfile = gridfile
[docs] def getFiles(self):
"""
Return a list of grid file names, expanded from the representative
file name, after checking for their existence.
For compressed grids, the `.zip` file is the only item returned,
but for uncompressed grids, all the standard grid component file
names are returned.
"""
if not self._gridfile:
return []
self.check()
(gridbase, ext) = os.path.splitext(self._gridfile)
if ext == '.zip':
files = [self._gridfile]
else: # Uncompressed grid
files = []
for ext in [
"_coul2.fld", ".csc", ".grd", "_greedy.save", ".gsc",
".save", ".site", "_vdw.fld"
]:
filename = gridbase + ext
if os.path.isfile(filename):
files.append(filename)
return files
[docs] def isFilled(self):
return (self._gridfile is not None)
[docs]class Text(PipeIO):
"""
A class to hold the names of one or more text (or log) files.
"""
[docs] def __init__(self, files=[]): # noqa: M511
"""
:type files: list
:param files:
A list of text or log file names. The file paths can be later
replaced with setData().
"""
self.setData(files)
self.type = TEXT
[docs] def check(self):
"""
Make sure all files in the list exist.
:raise RuntimeError:
Raised if "." is in the list or if any file can't be found.
"""
if self._files:
if '.' in self._files: # Check if a single file name is passed
raise RuntimeError(
"Error: pipeio.Text: input needs to be a LIST of files!")
for f in self._files:
if not os.path.exists(f):
raise RuntimeError(
"Error: pipeio.Text: file does not exist: " + f)
def __str__(self):
"""
Return a string representation of the object.
"""
s = "[text files]:"
for filename in self._files:
s += "\n " + filename
return s
[docs] def setData(self, textfiles):
"""
Replace the list of file names with `textfiles`.
:raise RuntimeError:
Raised if "." is in the list.
"""
self._files = []
# To check if a single file name is passed instead
if '.' in textfiles:
raise RuntimeError("Text.setData(): input must be a list of files.")
for filename in textfiles:
self._files.append(filename)
[docs] def getFiles(self):
"""
Return the list of text file names after checking that all of them
exist.
"""
self.check()
return self._files
[docs] def isFilled(self):
return len(self._files) > 0
[docs]class PhaseDB(PipeIO):
"""
A class to hold the absolute pathname of a Phase database.
"""
[docs] def __init__(self, path=None, remote=False):
"""
:type path: str
:param path: The path to the Phase database (This must end with .phdb.)
:type remote: bool
:param remote:
A value to control whether the database should be checked for
existence only on the remote host, since the localhost may not
have access to that directory.
"""
self.setData(path)
self.type = PHASEDB
self._remote = remote
[docs] def check(self):
"""
Make sure all files in the list exist.
:raise RuntimeError:
Raised if "." is in the list or if any file can't be found.
"""
if self._path and not self._remote:
if self._path.endswith(".phdb"):
# New format
if os.path.isdir(self._path):
return
else:
# Old format
if os.path.isfile(self._path + '_phasedb'):
return
# If got here, then the database does not exist
raise RuntimeError(
'Error: pipeio.PhaseDB: Database does not exist: "%s"' %
self._path)
[docs] def getPath(self):
"""
Returns the path (abolute base name).
For the old format, does NOT include "_phasedb", for new format DOES
include the ".phdb".
"""
return self._path
def __str__(self):
"""
Return a string representation of the object.
"""
s = "[phasedb]:"
s += '\n %s' % self._path
return s
[docs] def setData(self, path):
"""
Replace the stored database path.
"""
self._path = path
[docs] def getFiles(self):
"""
Return the list of files.
"""
if not self._path:
return []
elif self._path.endswith(".phdb"):
return [] # No files, since it's a directory
# Otherwise these files will get attempted to be added to the job
# record.
else:
# Old format
# Ev:95999 - Avoid copying phasdb to launch directory at the end of job
# since user has selected a preferred location for it.
return []
[docs] def isFilled(self):
return (self._path is not None)
[docs] def getOutputPaths(self):
if self._path and self._path.endswith(".phdb"):
# When copying user output, return the path so it's registered with
# jobcontrol
return [self._path]
return []