import csv
import enum
import os
import typing
import zipfile
from collections import defaultdict
from contextlib import contextmanager
import requests.exceptions
from more_itertools import chunked
from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.infra import mmproj
from schrodinger.models import parameters
from schrodinger.project import project
from schrodinger.Qt import QtCore
from schrodinger.structutils.transform import get_centroid
from schrodinger.tasks import tasks
from . import constants
from . import entry_types
from . import export_models
from . import ld_utils
from . import login
from . import upload_utils
BATCH_SIZE = 50
PROPNAME_COMPOUND_ID = constants.PROPNAME_COMPOUND_ID
DUMMY_PROP_DICT = export_models.make_prop_dict(name='Dummy prop dict')
[docs]class LDError(Exception):
pass
FORMAT_PARAMS = {
'delimiter': '\t',
'lineterminator': '\n',
'quoting': csv.QUOTE_NONE
}
# Mapping file TSV strings
TSV_CORP_ID = 'Corporate ID'
TSV_MODEL_NAME = 'Model Name'
TSV_TARGET_KEY = 'Target Key'
TSV_LIGAND_KEY = 'Ligand Key'
TSV_HEADER = [TSV_CORP_ID, TSV_MODEL_NAME, TSV_TARGET_KEY, TSV_LIGAND_KEY]
TSV_ENTITY_ID = 'entity_id'
TSV_MODEL_NAME87 = 'model_name'
TSV_LIG_PATH = 'ligand_zip_file_path'
TSV_REC_PATH = 'target_zip_file_path'
TSV_LIG_NAME = 'ligand_name'
TSV_REC_NAME = 'target_name'
TSV_HEADER87 = [
PROPNAME_COMPOUND_ID, TSV_ENTITY_ID, TSV_MODEL_NAME87, TSV_LIG_PATH,
TSV_REC_PATH, TSV_LIG_NAME, TSV_REC_NAME
]
DUMMY_CORP_ID = 'dummy_corp_id'
# LiveDesign export JSON properties
RESPONSE = 'import_responses'
LIVE_REPORT_URL = 'live_report_url'
SUCCESS = 'success'
CORPORATE_ID = 'corporate_id'
ADD_VIEW_TSV = 'additional_view_information.txt'
[docs]class TaskType(enum.Enum):
two_d = '2D'
three_d = '3D'
[docs]class ExportType(enum.Enum):
sdf = 'SDF'
maestro = 'MAESTRO'
maestro_sdf = 'MAESTRO_SDF'
def __str__(self):
return self.value
[docs]class LDExportTaskMixin:
[docs] @contextmanager
def handleLDExceptions(self):
"""
A context manager for handling that occur when interacting with LD.
Catches various exceptions and re-raises them as `LDError` with
additional explanatory text.
:raise LDError: if any exceptions occur in the context
"""
msg = None
try:
yield
except requests.exceptions.ConnectionError:
msg = ('Maestro was unable to connect to the LiveDesign server due'
' to a connection error.')
except requests.exceptions.Timeout:
msg = 'Attempt to connect to the LiveDesign server timed out.'
except Exception as exc:
msg = str(exc)
if msg:
raise LDError(f'Task {self.name} failed: {msg}')
def _getProjectID(self):
"""
:raise LDError: if the `LDClient` call fails
:return: the project ID for this export, if found
:rtype: int or NoneType
"""
proj_name = self.input.ld_destination.proj_name
with self.handleLDExceptions():
proj_id = self.input.ld_client.get_project_id_by_name(proj_name)
return None if proj_id == [] else proj_id
[docs]class MasterExportTask(LDExportTaskMixin, tasks.BlockingFunctionTask):
"""
Primary LD export task. Responsible for
1. Preparing data for export
2. Launching subtasks that perform export processes
3. Communicating with the LD Export panel
:ivar exportFailed: a signal containing an error message describing export
failures
:vartype exportFailed: QtCore.pyqtSignal
"""
input: export_models.TaskInput
exportFailed = QtCore.pyqtSignal(str)
num_subtasks = parameters.NonParamAttribute()
[docs] class Output(parameters.CompoundParam):
lr_url: str = None
result_urls: typing.Set[str]
num_success: int
num_failure: int
unexported_items: typing.List[export_models.ThreeDExportItem]
corp_ids: typing.List[str]
[docs] def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.num_subtasks = 0
@tasks.preprocessor(order=constants.ORDER_POST_SUMMARY)
def _createLiveReport(self):
"""
If necessary, create a new LiveReport.
"""
ld_dest = self.input.ld_destination
if ld_dest.lr_id != '':
return
with self.handleLDExceptions():
live_report = create_live_report(self.input.ld_client,
self.input.ld_models,
ld_dest.proj_name, ld_dest.lr_name)
ld_dest.lr_id = live_report.id
@tasks.preprocessor(order=constants.ORDER_POST_SUMMARY)
def _updateStructureProperties(self):
"""
Modify structure properties as necessary for export.
"""
for idx, st in enumerate(self.input.structures_for_2d_export):
# Structures cannot be exported to LiveDesign without titles
st.title = st.title or ' '
# In order to display titles in LiveDesign, they must be stored on
# a particular property
st.property[constants.PROPNAME_SD_TITLE] = st.title
if use_new_export(self.input.ld_client):
# For the new export API, we must also define a unique compound
# ID for each structure
compound_id = str(idx)
st.property[PROPNAME_COMPOUND_ID] = compound_id
# If the structure that is being written to the SDF file (the
# "2D" structure) is not identical to the structure being
# written to the .mae file (the "3D" structure), then the
# same compound ID must be applied to that structure separately
for item in self.input.three_d_export_items:
if item.key == st:
item.ligand.property[PROPNAME_COMPOUND_ID] = compound_id
@tasks.preprocessor(order=constants.ORDER_POST_SUMMARY)
def _removeRedundantCompounds(self):
"""
For exports using the old API, structures meant for 3D export should
not also be exported as 2D compounds.
"""
if use_new_export(self.input.ld_client):
return
inp = self.input
sts_for_2d_export = set(inp.structures_for_2d_export)
sts_for_2d_export -= {item.ligand for item in inp.three_d_export_items}
inp.structures_for_2d_export = list(sts_for_2d_export)
@tasks.preprocessor(order=constants.ORDER_POST_SUMMARY)
def _createDummyReceptor(self):
"""
If necessary, add a dummy receptor structure to 3D export items.
The dummy structure contains a single, distant atom.
"""
if use_new_export(self.input.ld_client):
return
items = self.input.three_d_export_items
if not any(item.receptor is None for item in items):
return
# Create a dummy receptor structure
dummy_rec = structure.create_new_structure(num_atoms=1)
dummy_rec.title = "Dummy receptor"
# We don't want the dummy receptor to be seen by the user in the LD 3D
# viewer, so keep it far away from the ligands
atom = next(iter(dummy_rec.atom))
atom.xyz = list(get_centroid(items[0].ligand) + 1000)[:3]
for item in items:
item.receptor = item.receptor or dummy_rec
[docs] def mainFunction(self):
"""
Create and launch export subtasks.
"""
if use_new_export(self.input.ld_client):
# Run a single export task for all data
self._runStructureExportTask(TaskType.three_d)
else:
# If necessary, export 2D data
if self.input.structures_for_2d_export:
self._runStructureExportTask(TaskType.two_d)
# If necessary, separately export 3D data
_3d_items = self.input.three_d_export_items
if _3d_items:
task = self._runStructureExportTask(TaskType.three_d)
_3d_items = task.output.three_d_export_items
# If necessary, export more 3D data that relies on the original 3D
# export to be categorized properly
if _3d_items:
task = self._runStructureExportTask(
TaskType.three_d, three_d_export_items=_3d_items)
_3d_items = task.output.three_d_export_items
if _3d_items:
num_items = len(_3d_items)
msg = (f'Unable to identify export key for {num_items}'
' structures.')
self.exportFailed.emit(msg)
raise RuntimeError(msg)
if self.input.ffc_export_specs:
# If necessary, export FFC data
task = AttachmentExportTask()
task.input.setValue(self.input)
task.input.corp_ids = self.output.corp_ids
self._runExportTask(task)
def _runExportTask(self, task):
"""
Run the supplied export task, then process the result.
:param task: an export task
:type task: tasks.ThreadFunctionTask
"""
task_dir = self.getTaskDir()
task.specifyTaskDir(task_dir)
task.name += str(self.num_subtasks)
self.num_subtasks += 1
task.start()
task.wait() # TODO PANEL-18317
self.output.num_success += task.output.num_success
self.output.num_failure += task.output.num_failure
if isinstance(task, BaseStructureExportTask):
self.output.unexported_items.extend(task.output.unexported_items)
error_msg = None
if task.failure_info:
# If an exception was raised during the task
error_msg = str(task.failure_info.exception)
elif task.output.num_failure > 0:
# If the export failed without raising an exception
error_msg = (f'Task "{task.name}" failed to export all structures'
' to LD.')
if error_msg:
self.exportFailed.emit(error_msg)
raise RuntimeError(error_msg)
def _runStructureExportTask(self, task_type, three_d_export_items=None):
"""
Create the appropriate export task, run it, and collect the results.
:param task_type: the type of export task to return
:type task_type: TaskType
:param three_d_export_items: optionally, a list of 3D export items to
assign to the task input
:type three_d_export_items: list[export_models.ThreeDExportItem] or
NoneType
:return: an structure export task object
:rtype: BaseStructureExportTask
"""
if use_new_export(self.input.ld_client):
task_class = ExportTask87
else:
task_class = EXPORT_TASK_MAP[task_type]
task = task_class()
task.input.setValue(self.input)
task.input.export_3d = task_type == TaskType.three_d
if three_d_export_items:
task.input.three_d_export_items = three_d_export_items
self._runExportTask(task)
output = self.output
output.lr_url = task.output.lr_url
output.result_urls |= task.output.result_urls
output.corp_ids.extend(task.output.corp_ids)
return task
[docs]class BaseStructureExportTask(LDExportTaskMixin, tasks.ThreadFunctionTask):
"""
Abstract task for exporting structure data to LiveDesign.
"""
file_batches: typing.List[export_models.FileBatch]
input: export_models.TaskInput
_map_file_header: typing.List[str]
_map_file_base_name: str
prop_dicts = parameters.NonParamAttribute()
[docs] class Output(parameters.CompoundParam):
lr_url: str = None
result_urls: typing.Set[str]
num_success: int
num_failure: int
three_d_export_items: typing.List[export_models.ThreeDExportItem]
unexported_items: typing.List[export_models.ThreeDExportItem]
corp_ids: typing.List[str]
[docs] def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._results = []
self.prop_dicts = []
@tasks.preprocessor(order=constants.ORDER_PROP_DICT)
def _addPropDicts(self):
"""
Update task property dictionaries used to specify exported structure
property data.
"""
for prop_spec in self.input.property_export_specs:
prop_spec.addDataToExportTask(self)
[docs] def mainFunction(self):
"""
Perform the LiveDesign export operation for each batch of structures.
"""
for batch_idx, file_batch in enumerate(self.file_batches):
ld_client = self.input.ld_client
ld_dest = self.input.ld_destination
compound_source = self._getCompoundSource()
prop_dicts = self.prop_dicts or [DUMMY_PROP_DICT]
if file_batch.three_d_file_path:
if use_new_export(ld_client):
export_type = ExportType.maestro_sdf
else:
export_type = ExportType.maestro
else:
export_type = ExportType.sdf
with self.handleLDExceptions():
task_id = export_to_ld(
ld_client=ld_client,
project_name=ld_dest.proj_name,
lr_name=ld_dest.lr_name,
lr_id=ld_dest.lr_id,
prop_dicts=prop_dicts,
publish_data=self.input.publish_data,
compound_source=compound_source,
export_type=export_type,
map_file_path=file_batch.map_file_path,
sdf_file_path=file_batch.sdf_file_path,
three_d_file_path=file_batch.three_d_file_path)
result_url = ld_client.wait_and_get_result_url(task_id,
timeout=3600)
self.output.result_urls.add(result_url)
result = ld_client.get_task_result(result_url)
self._results.append(result)
@tasks.postprocessor(order=constants.ORDER_COLLECT_RESULTS)
def _collectExportResults(self):
"""
Collect results from completed export processes.
"""
lr_url = None
output = self.output
for result in self._results:
if not result:
# If something went wrong, `result` may be an empty string
output.num_failure += 1
continue
lr_url = lr_url or result[LIVE_REPORT_URL]
for compound_data in result[RESPONSE]:
if compound_data[SUCCESS]:
output.num_success += 1
else:
output.num_failure += 1
output.lr_url = output.lr_url or lr_url
[docs] def get3DExportItems(self):
"""
:return: a list of 3D export items associated with this task
:rtype: list[export_models.ThreeDExportItem]
"""
return list(self.input.three_d_export_items)
def _getCompoundSource(self):
"""
Return the appropriate compound source attribute given the type of
maestro data we are exporting to LD.
For LD versions 8.6+, the `compound_source` argument must be passed to
`LDClient.start_export_assay_and_pose_data()` as:
- For DRUG_DISCOVERY mode:
- None for exporting all compounds
- For MATERIAL_SCIENCE mode:
- 'pri' for exporting regular compounds
- 'non_pri' for organometallic compounds.
:return: the appropriate compound source argument.
:rtype: str or None
"""
entry_type_name = self.input.entry_type_name
compound_source = None
ld_mode = login.get_LD_mode(self.input.ld_client)
if ld_mode == login.LDMode.MATERIALS_SCIENCE:
compound_source = constants.CompoundSource.pri
if entry_type_name == entry_types.OrganometallicCompounds.name:
compound_source = constants.CompoundSource.non_pri
return compound_source
def _createMapFile(self, three_d_items, batch_idx, *map_row_args):
"""
Create a LiveDesign export mapping file for the specified data.
:param three_d_items: a list of 3D data for export
:type three_d_items: list[export_models.ThreeDExportItem]
:param batch_idx: the index of the export batch that this map file is
for
:type batch_idx: int
:param `*map_row_args`: additional arguments to pass to `_getMapRows()`,
if necessary
:return: the name of the map file
:type: str
"""
map_file_name = self._getFilePath(batch_idx,
base_name=self._map_file_base_name,
ext='tsv')
header = self._map_file_header
with open(map_file_name, 'w', newline='', encoding='utf-8') as fh:
writer = csv.DictWriter(fh, **FORMAT_PARAMS, fieldnames=header)
writer.writeheader()
for item_3d in three_d_items:
for row in self._getMapRows(item_3d, *map_row_args):
writer.writerow(row)
return map_file_name
def _getMapRows(self, item_3d, *args):
"""
Return mapping file row dictionaries for the specified 3D export item.
Should be overridden in subclasses that wish to add data rows to
mapping files.
:param item_3d: a 3D export item
:type item_3d: export_models.ThreeDExportItem
:return: a list of mapping file row dictionaries for this item
:rtype: list[dict[str, str]]
"""
raise NotImplementedError
def _getFilePath(self, batch_idx, base_name=None, ext=None):
"""
Construct a standardized file name.
:param batch_idx: the batch index for this file
:type batch_idx: int
:param ext: optionally, the extension for this file
:type ext: str or NoneType
:return: a standardized file name
:rtype: str
"""
path = f'maestro_export_{batch_idx:02d}_{self.name}'
if base_name is not None:
path += f'_{base_name}'
if ext is not None:
path += f'.{ext}'
task_dir = self.getTaskDir()
return os.path.join(task_dir, path)
@tasks.postprocessor(order=constants.ORDER_ASSIGN_CORP_IDS)
def _collectCorpIDs(self):
"""
Add the corporate ID of each exported item to the output model.
"""
corp_ids = self.output.corp_ids
for result in self._results:
for compound_data in result[RESPONSE]:
if compound_data:
corp_ids.append(compound_data[CORPORATE_ID])
[docs]class BaseStructureExportTask86(BaseStructureExportTask):
"""
Abstract structure export task for older (v8.6-) versions of LiveDesign.
"""
structure_index_map = parameters.NonParamAttribute()
compound_batches = parameters.NonParamAttribute()
[docs] def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.structure_index_map = {}
self.compound_batches = []
@tasks.postprocessor(order=constants.ORDER_ASSIGN_CORP_IDS)
def _assignCorporateIDs(self):
"""
Assign corporate IDs from the recently exported 2D compounds to
associated 3D compounds.
Take corporate IDs assigned to 2D compounds after being uploaded to
LiveDesign and apply them to corresponding 3D structures. This is only
done if:
1. There are 3D structures to upload, and
2. The user did not manually assign corporate IDs to them already
"""
compound_3d_item_map = defaultdict(list)
for item in self.get3DExportItems():
if isinstance(item.key, structure.Structure):
# Keep track of 3D export items keyed to structures
compound_3d_item_map[item.key].append(item)
if not compound_3d_item_map:
# Return early if none of the 3D structures were keyed by compound
return
for batch_idx, result in enumerate(self._results):
# Responses are returned in the order that the compounds were
# exported, so key the corporate IDs by the result index
idx_corp_id_map = {}
for compound_idx, compound_data in enumerate(result[RESPONSE]):
if compound_data:
idx_corp_id_map[compound_idx] = compound_data[CORPORATE_ID]
for st in self.compound_batches[batch_idx]:
compound_idx = self.structure_index_map.get(st)
if compound_idx is None:
continue
corp_id = idx_corp_id_map.get(compound_idx)
if corp_id is not None:
for item in compound_3d_item_map[st]:
item.setItemKey(corp_id)
[docs]class Export2DTask(BaseStructureExportTask86):
"""
Structure export task for 2D data for older (v8.6-) versions of LiveDesign.
"""
[docs] def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._map_file_header = TSV_HEADER
self._map_file_base_name = '2Dmap'
[docs] @tasks.preprocessor(order=constants.ORDER_GEN_FILE)
def generateExportFiles(self):
"""
Create files necessary for the LiveDesign export and add them to the
`file_batches` parameter. This includes a 2D structure (.sdf) file and
an empty (but necessary) map file.
"""
self.compound_batches = list(
chunked(self.input.structures_for_2d_export, BATCH_SIZE))
for batch_idx, compound_batch in enumerate(self.compound_batches):
map_file_path = self._createMapFile([], batch_idx)
sdf_file_path = self._createSDFile(compound_batch, batch_idx)
file_batch = export_models.FileBatch()
file_batch.map_file_path = map_file_path
file_batch.sdf_file_path = sdf_file_path
self.file_batches.append(file_batch)
def _createSDFile(self, compounds, batch_idx):
"""
Create the SDF file for export to LiveDesign.
:param compounds: a list of structures to be exported to LiveDesign
:type compounds: list[structure.Structure]
:param batch_idx: the index of the export batch that this map file is
for
:type batch_idx: int
:return: the name of the SDF file
:rtype: str
"""
for idx, st in enumerate(compounds):
self.structure_index_map[st] = idx
sdf_file_name = self._getFilePath(batch_idx, ext='sdf')
with structure.StructureWriter(sdf_file_name) as writer:
writer.extend(compounds)
return sdf_file_name
[docs]class Export3DTask(BaseStructureExportTask86):
"""
Structure export task for 3D data for older (v8.6-) versions of LiveDesign.
"""
[docs] def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._map_file_header = TSV_HEADER
self._map_file_base_name = '3Dmap'
[docs] @tasks.preprocessor(order=constants.ORDER_GEN_FILE)
def generateExportFiles(self):
"""
Create files necessary for the LiveDesign export and add them to the
`file_batches` parameter. This includes a 3D structure file and a
corresponding map file.
"""
# Do not export 3D items for which the key/corporate ID is another
# structure; they will have to wait until those other structures have
# been exported and been associated with a corporate ID value
items_to_export, other_items = [], []
for item in self.get3DExportItems():
if isinstance(item.key, structure.Structure):
other_items.append(item)
else:
items_to_export.append(item)
self.output.three_d_export_items = other_items
item_batches = chunked(items_to_export, BATCH_SIZE)
for batch_idx, item_batch in enumerate(item_batches):
three_d_file_path, st_eid_map = self._createPrjZipFile(
item_batch, batch_idx)
map_file_path = self._createMapFile(item_batch, batch_idx,
st_eid_map)
file_batch = export_models.FileBatch()
file_batch.map_file_path = map_file_path
file_batch.three_d_file_path = three_d_file_path
self.file_batches.append(file_batch)
def _createPrjZipFile(self, export_items, batch_idx):
"""
Write supplied 3D structures to a .prjzip file.
:param export_items: a list of 3D data for export
:type export_items: list[export_models.ThreeDExportItem]
:param batch_idx: the index of the export batch to which these
structures belong
:type batch_idx: int
:return: a tuple containing the .prjzip file path and a dictionary
mapping structures to their entry IDs in the .prjzip
:rtype: tuple[str, dict[structure.Structure, str]]
"""
structures = []
# Add all receptors first
for item in export_items:
rec = item.receptor
if rec and rec not in structures:
structures.append(rec)
# Add all 3D ligands
lig_idx = 0
ligands = []
for item in export_items:
lig = item.ligand
if lig and lig not in structures:
ligands.append(lig)
self.structure_index_map[lig] = lig_idx
lig_idx += 1
structures += ligands
self.compound_batches.append(ligands)
st_eid_map = {}
if mm.mmtable_refcount() == 0:
mm.mmtable_initialize(mm.MMERR_DEFAULT_HANDLER)
prj_path = self._getFilePath(batch_idx, ext='prj')
ph = mmproj.mmproj_project_new(prj_path)
proj = project.Project(project_handle=ph)
for st in structures:
st_eid_map[st] = proj.importStructure(st).entry_id
proj.close()
# The LiveDesign conversion script expects this file in the project
# directory
add_view_path = os.path.join(prj_path, ADD_VIEW_TSV)
with open(add_view_path, 'w'):
pass
task_dir = self.getTaskDir()
zip_file_name = project.zip_project(prj_path, task_dir)
return zip_file_name, st_eid_map
def _getMapRows(self, item_3d, st_eid_map):
"""
Return mapping file row dictionaries for the specified 3D export item.
:param item_3d: a 3D export item
:type item_3d: export_models.ThreeDExportItem
:param st_eid_map: a dictionary mapping structures being exported to
the entry IDs for those structures in the .prjzip file being sent to
LiveDesign
:type st_eid_map: dict[structure.Structure, int]
:return: a list of mapping file row dictionaries for this item
:rtype: list[dict[str, str]]
"""
rows = []
corp_id = item_3d.getLigandCorpID()
rec_eid = st_eid_map.get(item_3d.receptor)
lig_eid = st_eid_map.get(item_3d.ligand)
for spec in item_3d.three_d_specs:
row = {
TSV_CORP_ID: corp_id,
TSV_MODEL_NAME: spec.ld_model,
TSV_TARGET_KEY: rec_eid,
TSV_LIGAND_KEY: lig_eid
}
rows.append(row)
return rows
[docs]class ExportTask87(BaseStructureExportTask):
"""
Export structure task for newer (v8.7+) versions of LiveDesign.
:cvar EMPTY_PATH_TUPLE: a tuple of `None` to use as a default return
value `st_path_map` if a key is not defined
:vartype EMPTY_PATH_TUPLE: tuple[NoneType, NoneType]
:ivar st_path_map: a dictionary mapping structures to a tuple of
(absolute path, relative path) where they have been stored as .mae
files
:vartype st_path_map: dict[structure.Structure, tuple[str, str] or
tuple[None, None]]
"""
EMPTY_PATH_TUPLE = (None, None)
st_path_map = parameters.NonParamAttribute()
[docs] def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.st_path_map = {}
self._map_file_header = TSV_HEADER87
self._map_file_base_name = '3Dmap'
[docs] @tasks.preprocessor(order=constants.ORDER_GEN_FILE)
def generateExportFiles(self):
"""
Create files necessary for the LiveDesign export and add them to the
`file_batches` parameter. This includes a SDF file (2D), a zipped
directory of MAE files (3D), and a TSV file (mapping) that describes
the relationship between the two.
"""
combined_batches = self._getCombinedBatches()
for batch_idx, combined_batch in enumerate(combined_batches):
compound_batch, item_batch = combined_batch
sdf_file_path = self._createSDFile(compound_batch, batch_idx)
zip_file_path = self._createMaeZip(item_batch, batch_idx)
map_file_path = self._createMapFile(item_batch, batch_idx)
file_batch = export_models.FileBatch()
file_batch.map_file_path = map_file_path
file_batch.sdf_file_path = sdf_file_path
file_batch.three_d_file_path = zip_file_path
self.file_batches.append(file_batch)
def _getCombinedBatches(self):
"""
Return a list of batches for simultaneous 2D and 3D export.
:return: a list of tuples, where each tuple contains
1. A list of compounds for 2D export, and
2. A corresponding list of 3D export items for 3D export
:rtype: List[Tuple[List[structure.Structure],
List[export_models.ThreeDExportItem]]]
"""
combined_batches = []
compound_batch, item_batch = [], []
items = self.get3DExportItems()
for st in self.input.structures_for_2d_export:
compound_batch.append(st)
compound_items = []
for item in items:
if st in [item.ligand, item.key]:
# The 3D batch should contain corresponding structures:
# structures meant for 3D export that are either identical
# to one of the 2D structures being exported, or which are
# keyed by those structures (for when the 3D structure does
# not match the 2D compound, such as for covalent docking
# systems)
compound_items.append(item)
item_batch += compound_items
for item in compound_items:
items.remove(item)
if max(len(compound_batch), len(item_batch)) > BATCH_SIZE:
combined_batches += [(compound_batch, item_batch)]
compound_batch, item_batch = [], []
if compound_batch or item_batch:
combined_batches += [(compound_batch, item_batch)]
# If any 3D items were not included in any batch, something went wrong.
# Make sure it is recorded so it does not become a silent failure.
self.output.unexported_items.extend(items)
return combined_batches
def _createSDFile(self, compounds, batch_idx):
"""
Create the SDF file for export to LiveDesign.
:param compounds: a list of structures to be exported to LiveDesign
:type compounds: list[structure.Structure]
:param batch_idx: the index of the export batch that this map file is
for
:type batch_idx: int
:return: the name of the SDF file
:rtype: str
"""
sdf_file_name = self._getFilePath(batch_idx, ext='sdf')
with structure.StructureWriter(sdf_file_name) as writer:
writer.extend(compounds)
return sdf_file_name
def _createMaeZip(self, export_items, batch_idx):
"""
Write supplied 3D structures to a zip archive of .mae files.
:param export_items: a list of 3D data for export
:type export_items: list[export_models.ThreeDExportItem]
:param batch_idx: the index of the export batch to which these
structures belong
:type batch_idx: int
:return: the .zip file path
:rtype: str
"""
structures = []
for item in export_items:
rec = item.receptor
if rec and rec not in structures:
structures.append(rec)
for item in export_items:
lig = item.ligand
if lig and lig not in structures:
structures.append(lig)
zip_file_path = self._getFilePath(batch_idx, ext='zip')
with zipfile.ZipFile(zip_file_path, 'w') as zip_handle:
for st_idx, st in enumerate(structures):
path_tuple = self.st_path_map.get(st)
if path_tuple:
# This structure has already been written to file, so use
# the existing files
mae_file_path, rel_path = path_tuple
else:
# Write this structure to a new file
mae_file_path = self._getFilePath(batch_idx,
base_name=f'st{st_idx}',
ext='mae')
st.write(mae_file_path)
com_path = os.path.commonpath(
[zip_file_path, mae_file_path])
rel_path = os.path.relpath(mae_file_path, com_path)
self.st_path_map[st] = (mae_file_path, rel_path)
zip_handle.write(mae_file_path, arcname=rel_path)
return zip_file_path
def _getMapRows(self, item_3d):
"""
Return mapping file row dictionaries for the specified 3D export item.
:param item_3d: a 3D export item
:type item_3d: export_models.ThreeDExportItem
:return: a list of mapping file row dictionaries for this item
:rtype: list[dict[str, str]]
"""
rows = []
compound_id = item_3d.getLigandCompoundID()
corp_id = item_3d.getLigandCorpID()
_, rec_path = self.st_path_map.get(item_3d.receptor,
self.EMPTY_PATH_TUPLE)
_, lig_path = self.st_path_map.get(item_3d.ligand,
self.EMPTY_PATH_TUPLE)
custom_text = self.input.pose_name_custom_text
propname = self.input.pose_name_propname
lig_name = None
if custom_text or propname:
lig_name = custom_text
if propname:
data_name = propname.dataName()
lig_name += str(item_3d.ligand.property.get(data_name, ''))
rec_name = item_3d.receptor.title if item_3d.receptor else None
for spec in item_3d.three_d_specs:
row = {
PROPNAME_COMPOUND_ID: compound_id,
TSV_ENTITY_ID: corp_id,
TSV_MODEL_NAME87: spec.ld_model,
TSV_REC_PATH: rec_path,
TSV_LIG_PATH: lig_path,
TSV_REC_NAME: rec_name,
TSV_LIG_NAME: lig_name
}
rows.append(row)
return rows
[docs]class AttachmentExportTask(LDExportTaskMixin, tasks.ThreadFunctionTask):
"""
Export task for FFC attachment data.
:ivar attachment_data_map: a dictionary mapping the column name for an
attachment to a data class storing other information about that
attachment
:vartype attachment_data_map: dict[str, export_models.AttachmentData]
"""
input: export_models.AttachmentTaskInput
output: export_models.AttachmentTaskOutput
attachment_data_map = parameters.NonParamAttribute()
[docs] def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.attachment_data_map = {}
@tasks.preprocessor(order=constants.ORDER_PROP_DICT)
def _addFFCData(self):
"""
Update task property dictionaries used to specify exported structure
property data.
"""
for ffc_spec in self.input.ffc_export_specs:
ffc_spec.addDataToExportTask(self)
[docs] def mainFunction(self):
"""
Upload the attachment files and associate them with the appropriate
LiveReport cells.
"""
attachment_id_map = self._getAttachmentIDMap()
for col_name, attachment_ids in attachment_id_map.items():
att_data = self.attachment_data_map[col_name]
column_id = self._getFreeformColumn(col_name, att_data.description)
success = self._addValuesToFreeformColumn(attachment_ids, column_id)
if success:
self.output.num_success += 1
else:
self.output.num_failure += 1
def _getAttachmentIDMap(self):
"""
Get a map relating column names to their given attachment IDs based
on the specifications given in the Attachment Data Map. Uploads
attachments to LiveDesign.
The list of attachment IDs will mirror the order of the attachment items.
The list will exhaust all of the structures of the 1st item, then the 2nd, and so on.
:return: Map relating column names to attachment IDs
:rtype: dict(str, list(str))
"""
ld_client = self.input.ld_client
proj_id = self._getProjectID()
attachment_id_map = {}
with self.handleLDExceptions():
for col_name, att_data in self.attachment_data_map.items():
attachment_ids = []
for attachment_item in att_data.attachment_items:
attachment_id = upload_utils.upload_ld_attachment(
attachment_item.file_path,
proj_id,
ld_client,
remote_file_name=attachment_item.remote_file_name,
file_type=attachment_item.file_type)
# Associate the same attachment ID with every structure
for _ in attachment_item.row_structures:
attachment_ids.append(attachment_id)
attachment_id_map[col_name] = attachment_ids
return attachment_id_map
def _getFreeformColumn(self, column_name, description):
"""
Obtain an existing or create and add a new freeform column to
the live report associated with this export process.
:param column_type: the type of free form column, see constants defined
in `FreeformColumn` the ldclient models.
:type column_type: str
:param description: the description
:type description: str
:return: the column ID of the new column, if available
:rtype: str or None
"""
ld_client = self.input.ld_client
ld_models = self.input.ld_models
published = self.input.publish_data
lr_id = self.input.ld_destination.lr_id
proj_id = self._getProjectID()
with self.handleLDExceptions():
# Keep the deprecated `freeform_columns()` project ID argument
# to support older versions of LDClient.
ffcs = ld_client.freeform_columns(proj_id)
for ffc in ffcs:
if published and ffc.project_id != str(proj_id):
# If the column was published, compare the project ID
continue
elif not published and ffc.live_report_id != lr_id:
# If the column was not published, compare the live
# report ID
continue
elif ffc.published == published and ffc.name == column_name:
# A FFC with our specifications already exists, so use
# that one rather than adding a new one.
column = ffc
break
else:
# No column with our specifications exists, so create a new
# one.
column_model = ld_models.FreeformColumn(
column_name,
description,
published=published,
project_id=proj_id,
live_report_id=lr_id,
type=ld_models.FreeformColumn.COLUMN_ATTACHMENT)
column = ld_client.create_freeform_column(column_model)
ld_client.add_columns(lr_id, [column.id])
return column.id
def _addValuesToFreeformColumn(self, values, column_id):
"""
Add values to the `column_id` free form column of the live report
associated with this export process.
:param values: the values to add
:type values: list(object)
:param column_id: the column ID of the attachment column
:type column_id: str
:return: whether the export is successful
:rtype: bool
"""
lr_id = self.input.ld_destination.lr_id
proj_id = self._getProjectID()
ld_client = self.input.ld_client
ld_models = self.input.ld_models
published = self.input.publish_data
observations = set()
for value, corporate_id in zip(values, self.input.corp_ids):
obs = ld_models.Observation(proj_id,
corporate_id,
column_id,
value,
live_report_id=lr_id,
published=published)
observations.add(obs)
if not observations:
msg = 'No observations prepared for export.'
self.setErrorStatus(self.name, msg)
return False
with self.handleLDExceptions():
ld_client.add_freeform_column_values(observations)
return True
@tasks.postprocessor(order=constants.ORDER_COLLECT_RESULTS)
def _collectExportResults(self):
"""
Keep track of the status of finished export processes.
"""
if self.status == tasks.Status.FAILED:
self.output.num_failure += 1
@tasks.postprocessor()
def _removeLocalFiles(self):
for ffc_spec in self.input.ffc_export_specs:
ffc_spec.removeLocalFiles()
EXPORT_TASK_MAP = {
TaskType.two_d: Export2DTask,
TaskType.three_d: Export3DTask
} # yapf: disable
[docs]def export_to_ld(ld_client,
project_name,
lr_name,
lr_id,
prop_dicts,
publish_data,
compound_source,
export_type,
map_file_path,
sdf_file_path=None,
three_d_file_path=None):
"""
Export the supplied data to LiveDesign.
:param ld_client: LD client session
:type ld_client: ldclient.LDClient
:param project_name: the name of the LiveDesign project
:type project_name: str
:param lr_name: the name of the LiveReport
:type lr_name: str
:param lr_id: the ID of the LiveReport
:type lr_id: int
:param prop_dicts: property arguments that specify which structure
properties should be converted into LiveDesign columns
:type prop_dicts: dict(str, str)
:param publish_data: whether the exported data should be published
globally for all LiveDesign users
:type publish_data: bool
:param compound_source: the compound source argument required for
certain LD versions
:type compound_source: str or None
:param export_type: the type of export to perform.
:type export_type: ExportType
:param map_file_path: the path of the mapping file
:type map_file_path: str
:param sdf_file_path: optionally, the path of an SDF file
:type sdf_file_path: str or NoneType
:param three_d_file_path: optionally, the path of an 3D file
:type three_d_file_path: str or NoneType
:return: task id
:rtype: int
"""
# The corporate_id_column controls which structure property should be
# utilized to obtain any corporate ID data to match existing compounds
# by. For SDF export type, the structure property data name must be
# set under: s_m_Corporate_ID. For MAESTRO export type, the structure
# property display name must be given and the prefix "s_m_" is appended
# to the argument. However, since the corporate ID data might reside in
# columns other than Maestro properties, in which case the appended
# prefix will be incorrect, we resort to using a single property to
# pass the corporate ID data: 'Corporate ID'. See SS-24830 for more
# details.
corporate_id_prop = ('Corporate ID' if export_type == ExportType.maestro
else constants.PROPNAME_CORP_ID)
kwargs = dict(project=project_name,
mapping_file_name=map_file_path,
corporate_id_column=corporate_id_prop,
live_report_name=lr_name,
published=publish_data,
properties=prop_dicts,
export_type=str(export_type),
live_report_id=lr_id)
# Add conditional arguments
if compound_source:
kwargs.update(compound_source=compound_source)
if not use_new_export(ld_client):
data_file_path = sdf_file_path or three_d_file_path
sha1 = ld_utils.get_sha1(data_file_path)
new_kwargs = dict(data_file_name=data_file_path, sha1=sha1)
export_method = ld_client.start_export_assay_and_pose_data
else:
sdf_sha1 = ld_utils.get_sha1(sdf_file_path)
three_d_sha1 = ld_utils.get_sha1(three_d_file_path)
map_file_sha1 = ld_utils.get_sha1(map_file_path)
new_kwargs = dict(sdf_file_name=sdf_file_path,
sdf_file_sha1=sdf_sha1,
mapping_file_sha1=map_file_sha1,
three_d_file_name=three_d_file_path,
three_d_file_sha1=three_d_sha1)
export_method = ld_client.load_assay_and_pose_data
kwargs.update(new_kwargs)
return export_method(**kwargs)
[docs]def create_live_report(ld_client, ld_models, proj_name, title):
"""
Create a new LiveReport.
:param ld_client: the LiveDesign client instance
:type ld_client: client.LDClient
:param ld_models: the livedesign models module
:type ld_models: module
:param proj_name: the name of the project to which the LiveReport should be
added
:type proj_name: str
:param title: the desired LiveReport title
:type title: str
:return: the new LiveReport
:rtype: models.LiveReport
"""
project_id = ld_client.get_project_id_by_name(proj_name)
live_report = ld_models.LiveReport(title=title, project_id=project_id)
return ld_client.create_live_report(live_report)
[docs]def use_new_export(ld_client):
"""
Whether to use the new (LD v8.7+) export API.
:param ld_client: the LiveDesign client instance
:type ld_client: client.LDClient
:return: whether to use the new (LD v8.7+) export process
:rtype: bool
"""
return login.get_LD_version(ld_client) >= login.LD_VERSION_NEW_EXPORT