import copy
from collections import OrderedDict
from collections import defaultdict
import requests
from schrodinger import structure
from schrodinger.utils import fileutils
from . import constants
from . import ld_utils
from . import login
# LiveDesign export xtals sources keys and values
TARGET = 'TARGET'
LIGAND = 'LIGAND'
POSE_ID = 'pose_id'
CORPORATE_ID = 'corporate_id'
FILE_NAME = 'file_name'
MODEL_NAME = 'model_name'
STRUCTURE_TYPE = 'structure_type'
POSE_EXT = 'mol2'
# Live design column properties
COMPOUND_STRUCTURE = 'Compound Structure'
VALUE_TYPE_3D = '3D'
FREEFORM_COLUMN = 'freeform'
# Structure properties
SD_ID = 's_sd_ID'
SD_ALL_IDS = 's_sd_All_IDs'
# Maestro Grouping
GROUPNAME_2D_DATA = '2D Compounds'
# LiveDesign Metadata keys
ROW_INFO_KEY = 'row_infos'
DISPLAY_ID_KEY = 'display_id'
ENTITY_ID_KEY = 'entity_id'
STRUCTURE_ATTACHMENTS = 'structure_attachments'
STRUCTURE_TRANSFORMATION = 'structure_transformation'
FILE_UPLOAD = 'file_upload'
# When retrieving structures from `LDClient.export_xtals_result()`, the title
# property of those structures will be a period-delimited string containing
# multiple pieces of information. `NUM_TITLE_PROPERTIES` records the number of
# properties we expect the title to store so that if the parsed string produces
# more than this number of terms, we know that the first element (column title)
# contains periods.
NUM_TITLE_PROPERTIES = 5
[docs]class ImportController(object):
[docs] def __init__(self, ld_client):
self.ld_client = ld_client
self._version = login.get_LD_version(self.ld_client)
self._ld_id_dict = {}
[docs] def importTabularData(self, lr_id, lr_col_ids):
"""
Import the tabular data from a Live Report given the live report id.
This data will include identifiers for data in attachment columns, which
can be imported with further calls to ImportController.
:param lr_id: the live report id
:type lr_id: str
:param lr_col_ids: ids of the desired columns from the live report. If
`None`, all column data will be downloaded.
:type lr_col_ids: `None`, or `list` of `int`
:return: structures containing all the tabular data
"""
res_str = self.ld_client.export_live_report(lr_id,
projection=lr_col_ids)
res_str = str(res_str, encoding='utf-8')
with structure.StructureReader.fromString(
res_str, format=structure.SD) as reader:
tabular_data_sts = list(reader)
# Store the corporate ID (AKA the entity ID) as a structure property for
# each imported structure.
self._ld_id_dict = get_st_entity_id_map(self.ld_client,
tabular_data_sts, lr_id)
cache_entity_IDs(tabular_data_sts, self._ld_id_dict)
return tabular_data_sts
[docs] def import3DColumns(self,
lr_id,
lr_columns,
tabular_data_sts,
lr_rows,
callback=lambda col_name: None):
"""
Downloads 3D data for any columns in `lr_columns` with a 3D value type,
and creates new structures from the 3D data if there is any, copying
properties corresponding to columns in `lr_columns` from
`tabular_data_sts`. Combines the 2D tabular data, which is always
included as the first member of the resulting ordered dictionary.
Then any 3D data columns are appended in order.
The output is structured as an ordered dictionary with fields
representing columns, including the 2D data. Each column consists of
an ordered dictionary of {pose_id: ligand} mappings and an ordered
dictionary of {protein: pose_id} mappings.
:param lr_id: The live report id
:type lr_id: str
:param lr_columns: the desired columns from the live report
:type lr_columns: [ld_models.Column]
:param tabular_data_sts: The tabular data imported from LiveDesign
:type tabular_data_sts: [structure.Structure]
:param lr_rows: a dictionary containing row information downloaded from
the LiveDesign server
:type lr_rows: `dict`
:param callback: a callback function that takes the name of the current
column as its only argument, ie callback(str)
:type callback: callable
:return: An ordered dictionary by column name of pairs or ordered
dictionaries containing the pose_id: ligand and protein: pose_id
mappings.
:rtype: {str: ({int: structure.Structure}, {structure.Structure: int})}
"""
imported_sts = OrderedDict()
combined_sts = self._combineTabularData(tabular_data_sts)
empty_protein_dict = {}
empty_protein_dict[None] = list(combined_sts)
imported_sts[GROUPNAME_2D_DATA] = (combined_sts, empty_protein_dict)
cols_3d = [col for col in lr_columns if col.value_type == VALUE_TYPE_3D]
for col_3d in cols_3d:
callback(col_3d.name)
combined_sts, protein_dict = self._import3DCol(
lr_id, col_3d, tabular_data_sts, lr_rows)
imported_sts[col_3d.name] = combined_sts, protein_dict
return imported_sts
def _import3DCol(self, lr_id, col_3d, bare_sts, lr_rows):
"""
Import xtal data from a single column for a list of structures. Each
structure can have 0 to many poses in a column. Returns an ordered
dictionary of {pose_id: ligand} mappings and an ordered dictionary of
{protein: pose_id} mappings.
:param lr_id: The live report id
:type lr_id: str
:param col_3d: a livedesign models Column object representing a 3D data
column.
:type col_3d: ld_models.Column
:param bare_sts: a list of structures, each having a property
corresponding to col_3d, whose value is a list of pose ids.
:type bare_sts: [structure.Structure]
:param lr_rows: a dictionary containing row information downloaded from
the LiveDesign server
:type lr_rows: `dict`
:return: the pose_id: ligand mappings and the protein: pose_id mappings
:rtype: OrderedDict({int: structure.Structure}),
OrderedDict({structure.Structure: int})
"""
lig_sources, prot_sources, prot_duplicates, ligand_duplicates = \
self._makeXtalSourceDicts(lr_id, col_3d, bare_sts, lr_rows)
ligand_dict, protein_dict = self._downloadStructures(
lig_sources, prot_sources, prot_duplicates, ligand_duplicates)
combined_sts = self._combineStructures(bare_sts, ligand_dict, col_3d)
return combined_sts, protein_dict
def _combineStructures(self, bare_sts, ligand_dict, col_3d):
"""
Match the bare 2D ligand structures to the 3D ligand structures, and
copy over the structure properties.
:param bare_sts: the original structures, each representing a row of a
live report
:type bare_sts: [structure.Structure]
:param ligand_dict: A nested ordered dictionary of the ligand
structures, keyed by corporate id and pose id.
:type ligand_dict: {str: OrderedDict({int: structure.Structure})}
:param col_3d: a livedesign models Column object representing a 3D data
column.
:type col_3d: ld_models.Column
:return: An ordered dictionary of the ligand structures keyed by pose id
:rtype: OrderedDict({int: structure.Structure})
"""
# FIXME: This should be separated out into separate classes. Also, this
# should no longer be done since export_3d.py in LD should be modified
# to return the 3D data with the properties instead of matching the
# 2D structures with the 3D to copy the structure properties.
if self._version < login.LD_VERSION_REAL_VIRTUAL:
return self._combineStructuresUsingEntityID(bare_sts, ligand_dict)
else:
return self._combineStructuresUsingPoseID(bare_sts, ligand_dict,
col_3d)
def _combineStructuresUsingEntityID(self, bare_sts, ligand_dict):
"""
Combine all of the downloaded structures with their original structures
using the Entity ID and assemble them into an ordered dictionary keyed
by pose id.
All properties are copied from their original structures. The corporate
id is added to the title of each structure, which is then flattened out
of the resulting dictionary.
:param bare_sts: the original structures, each representing a row of a
live report
:type bare_sts: [structure.Structure]
:param ligand_dict: A nested ordered dictionary of the ligand
structures, keyed by corporate id and pose id.
:type ligand_dict: {str: OrderedDict({int: structure.Structure})}
:return: An ordered dictionary of the ligand structures keyed by pose id
:rtype: OrderedDict({int: structure.Structure})
"""
pose_sts = OrderedDict()
if not ligand_dict:
return pose_sts
for st in bare_sts:
# For LD versions >= 8.1, we must determine the entity / corporate
# ID specifically due to the loss of order of the structures.
corporate_id = self._ld_id_dict.get(st)
pose_lig_odict = ligand_dict[corporate_id]
for pose_id, lig_st in pose_lig_odict.items():
combined_lig_st = self._combineStructure(st, st_3d=lig_st)
combined_lig_st.title = f'{corporate_id} (pose {pose_id})'
pose_sts[pose_id] = combined_lig_st
return pose_sts
def _combineStructuresUsingPoseID(self, bare_sts, ligand_dict, col_3d):
"""
Combine all of the downloaded structures with their original structures
using the Pose IDs and assemble them into an ordered dictionary keyed
by pose id.
All properties are copied from their original structures. The corporate
id is added to the title of each structure, which is then flattened out
of the resulting dictionary.
Note: This is a temporary fix for 17-4.
There has been a change for LD 8.2+ where the real compounds are
separated from virtual compounds, so the entity ID returned by LDClient
methods will be different according to which type of compound
(real or virtual) the method is acting on.
:param bare_sts: the original structures, each representing a row of a
live report
:type bare_sts: [structure.Structure]
:param ligand_dict: A nested ordered dictionary of the ligand
structures, keyed by corporate id and pose id.
:type ligand_dict: {str: OrderedDict({int: structure.Structure})}
:param col_3d: a livedesign models Column object representing a 3D data
column.
:type col_3d: ld_models.Column
:return: An ordered dictionary of the ligand structures keyed by pose id
:rtype: OrderedDict({int: structure.Structure})
"""
# FIXME: This is a temporary fix for 17-4 so that it will work with the
# current production version of LD 8.2, which has reinvented the
# definition of the entity ID so it is no longer identical across the
# 2D and 3D data to do the matching. Thus, the pose ID is used instead,
# which from my understanding should be identical in 8.2. See
# PANEL-11225 for more info.
pose_sts = OrderedDict()
if not ligand_dict:
return pose_sts
# Arrange the ligands by their pose IDs
ligand_by_pose_ids = {}
for corporate_id, pose_lig_odict in ligand_dict.items():
ligand_by_pose_ids.update(pose_lig_odict)
# Each 2D structure holds the LR column data as a property with the
# column name being the key.
col_3d_string_property = structure.PropertyName(
type=structure.PROP_STRING, family='sd',
username=col_3d.name).dataName()
col_3d_int_property = structure.PropertyName(
type=structure.PROP_INTEGER, family='sd',
username=col_3d.name).dataName()
for st in bare_sts:
# Get the pose IDs related to this 2D structure / row in LR, and
# use it to match the 3D ligand structures
prop_value = st.property.get(col_3d_string_property)
if prop_value is None:
prop_value = st.property.get(col_3d_int_property)
string_ids = str(prop_value)
pose_ids = [int(p_id) for p_id in string_ids.split('\n') if p_id]
corporate_id = st.property[SD_ID]
for pose_id in pose_ids:
lig_st = ligand_by_pose_ids[pose_id]
combined_lig_st = self._combineStructure(st, st_3d=lig_st)
combined_lig_st.title = f'{corporate_id} (pose {pose_id})'
pose_sts[pose_id] = combined_lig_st
return pose_sts
def _makeLiveDesignIDsDict(self, lr_id):
"""
Generate a dictionary mapping each compound's display id in the LR to
its entitity id for easier access.
:param lr_id: the live report id
:type lr_id: str
:return: dictionary mapping display ids of compounds to their entitiy id
:rtype: `dict(str, str)`
"""
# This returns all the metadata about the LiveReport in JSON format
lr_results_metadata = self.ld_client.live_report_results_metadata(lr_id)
row_info_list = lr_results_metadata[ROW_INFO_KEY]
ld_id_dict = {
row_info_dict[DISPLAY_ID_KEY].strip():
row_info_dict[ENTITY_ID_KEY].strip()
for row_info_dict in row_info_list
}
return ld_id_dict
def _makeXtalSourceDicts(self, lr_id, col_3d, bare_sts, lr_rows):
"""
Generate the source dictionaries used as an argument to
ld_client.export_xtals. Creates a list of dictionaries for both the
ligands and proteins, along with the duplicate pose ids for the
proteins.
:param lr_id: The live report id
:type lr_id: str
:param col_3d: The live report column
:type col_3d: ld_models.Column
:param bare_sts: The structures to make dictionaries for
:type bare_sts: [structure.Structure]
:param lr_rows: a dictionary containing row information downloaded from
the LiveDesign server
:type lr_rows: `dict`
:return: the ligand sources, protein sources, and the duplicate protein
pose ids, and the duplicate ligand corporate and pose ids.
:rtype: [{}], [{}], {int: [int]}, {str: {int: [(str, int)]}
"""
# FIXME: Once all LD servers are upgraded to 8.1, we can remove this
# check.
if self._version < login.LD_VERSION_MULTIPLE_IDS:
protein_duplicates = defaultdict(list)
ligand_duplicates = defaultdict(list)
params = self._getXtalSourceParamsDepracated(
col_3d, bare_sts, lr_rows)
else:
params, protein_duplicates, ligand_duplicates = \
self._getXtalSourceParams(col_3d, lr_id)
lig_sources = []
prot_sources = []
unique_pose_ids = list(protein_duplicates)
for param in params:
pose_id, corporate_id = param
source_ligand = {
MODEL_NAME: col_3d.name,
FILE_NAME: None,
STRUCTURE_TYPE: LIGAND,
CORPORATE_ID: corporate_id,
POSE_ID: pose_id
}
lig_sources.append(source_ligand)
# If LD server is >= 8.1, we know which proteins are duplicates and
# thus not setup dicts for them to download multiple times
if pose_id in unique_pose_ids or self._version < login.LD_VERSION_MULTIPLE_IDS:
source_target = source_ligand.copy()
source_target[STRUCTURE_TYPE] = TARGET
prot_sources.append(source_target)
return lig_sources, prot_sources, protein_duplicates, ligand_duplicates
def _getXtalSourceParams(self, col_3d, lr_id):
"""
Retrieve the pose ids and corporate ids of all structures in the LR and
also compile the duplicate protein pose ids.
:param col_3d: The live report column
:type col_3d: ld_models.Column
:param lr_id: The live report id
:type lr_id: str
:var protein_id_cache: Cache of protein structure attachment ids -
key = id : value = pose_id.
:vartype protein_id_cache: dict{str, int}
:var ligand_id_cache: Cache of ligand structure attachment ids -
key = id : value = pose_id.
:vartype ligand_id_cache: dict{str, int}
:var protein_duplicates: Maps unique pose ids to duplicate pose ids -
key = pose_id : value = duplicate pose_ids
:vartype protein_duplicates: defaultdict(int, list[int])
:var ligand_duplicates: Maps unique corporate_ids to unique pose_ids to
list of tuples of duplicate corporate and pose ids.
:vartype ligand_duplicates: dict(str, dict(int, list[(str, int)]))
:return: the pose ids and corporate ids of all the structures in the LR
rows, and a map of unique protein pose ids to the duplicates, a
map of the duplicate ligand corporate and pose ids.
:rtype: [(int, str)], {int: [int]}, {str: {int: [(str, int)]}
"""
# Keeps track of pose_ids and corporate_ids for each row in LR
params = []
# FIXME: This is done so to find duplicates using the corporate id, and
# pose ids. This will be refactored in PANEL-11245.
protein_id_cache = {}
ligand_id_cache = {}
protein_duplicates = defaultdict(list)
ligand_duplicates = defaultdict(OrderedDict)
pose_dicts = self.ld_client.pose_search(lr_id)
for pose in pose_dicts:
if pose['column_id'] != col_3d.id:
continue
corporate_id = pose['ligand']['entity_id']
pose_id = int(pose['id'])
params.append((pose_id, corporate_id))
# Add Protein id to cache to eliminate duplicate proteins.
# For the case when the protein doesn't exist, the pose_id will be
# added to the 'None' key.
protein = pose['protein']
protein_id = None
if protein:
protein_id = self._getStructureAttachmentID(
protein[STRUCTURE_ATTACHMENTS])
if protein_id in protein_id_cache:
original_pose_id = protein_id_cache[protein_id]
protein_duplicates[original_pose_id].append(pose_id)
else:
protein_id_cache[protein_id] = pose_id
protein_duplicates[pose_id] = []
ligand = pose['ligand']
if ligand and ligand[STRUCTURE_ATTACHMENTS]:
ligand_id = self._getStructureAttachmentID(
ligand[STRUCTURE_ATTACHMENTS])
else:
continue
if ligand_id in ligand_id_cache:
org_corporate_id, original_pose_id = ligand_id_cache[ligand_id]
ligand_duplicates[org_corporate_id][original_pose_id].append(
(corporate_id, pose_id))
else:
ligand_id_cache[ligand_id] = (corporate_id, pose_id)
ligand_duplicates[corporate_id][pose_id] = []
return params, protein_duplicates, ligand_duplicates
def _getStructureAttachmentID(self, structure_attachments):
"""
Given a list of structure attachment metadata, get the appropriate
'file_upload' attachment ID.
Each protein or ligand pose dictionary object returned by LDClient
holds multiple structure attachment IDs, where only the
'structure_transformation' field holding the type 'file_upload' is the
relevant ID required.
:param structure_attachments: metadata dictionaries returned by LDClient
:type structure_attachments: List of Dict
:return: structure attachment ID
:rtype: str or None
"""
for st_attach in structure_attachments:
if st_attach[STRUCTURE_TRANSFORMATION] == FILE_UPLOAD:
return st_attach['id']
def _getXtalSourceParamsDepracated(self, col_3d, bare_sts, lr_rows):
"""
Generate the list of pose ids and corporate ids of the given 2D
structures and the LR rows.
Warning: this method will be removed once all LD servers migrate to 8.1
or above.
:param col_3d: The live report column
:type col_3d: ld_models.Column
:param bare_sts: The structures to make dictionaries for
:type bare_sts: [structure.Structure]
:param lr_rows: a dictionary containing row information downloaded from
the LiveDesign server
:type lr_rows: `dict`
:return: the pose ids and corporate ids of all the structures in the LR
rows
:rtype: [(str, str)]
"""
params = []
for st in bare_sts:
values = lr_rows[st.title]['cells'][col_3d.id]['values']
pose_ids = [value_dict['value'] for value_dict in values]
corporate_id = st.property[SD_ID]
for pose_id in pose_ids:
params.append((pose_id, corporate_id))
return params
def _downloadStructures(self, lig_sources, prot_sources, prot_duplicates,
ligand_duplicates):
"""
Download all of the xtal data from livedesign defined in the protein
ligand sources lists. The protein sources and ligand sources are treated
differently, as we only want one protein per parent compound, whereas
we want a ligand for each pose. The protein sources are a list of lists,
such that each inner list represents a single compound, whereas the
ligand sources is a single list where each source represents a pose.
The protein sources are searched one by one for the first readable
protein structure returned, as the live design server currently returns
unintelligible structures for all but one pose of the compound. Ideally
we would only need a single source per compound, rather than a list.
:param lig_sources: A list of source dictionaries. Each dictionary
represents a pose.
:type lig_sources: [{}]
:param prot_sources: A list of source dictionaries. Each dictionary
represents a protein.
:type prot_sources: [{}]
:param prot_duplicates: a map of unique pose IDs to a list of pose IDs
that share the same structure
:type prot_duplicates: {int: [int]}
:param ligand_duplicates: a map of unique corporate IDs to a map of
unique pose IDs to a list of duplicate corporate and pose IDs,
where each of these duplicate ids share the same structure.
:type ligand_duplicates: {str: {int: [(str, int)]}
:return: a nested dictionary of ligand structures keyed by corporate id
and pose id, and a mapping from protein structures to pose ids.
:rtype: {str: OrderedDict({int: structure.Structure})},
OrderedDict({structure.Structure, [int]})
"""
# task order chosen b/c protein will take the longest by far
# launch protein export task
# break up proteins to download one by one
prot_task_ids = []
for prot_source in prot_sources:
# FIXME: Pre-8.1: There currently isn't a way to figure out whether
# a particular pose contains a protein or not, so we have to catch
# the HTTP exception in case the pose doesn't hold a protein. This
# check can be removed once LD servers are updated.
try:
prot_task_id = self.ld_client.export_xtals([prot_source],
POSE_EXT)
except requests.HTTPError as e:
# No protein was found for this pose
prot_task_id = None
pose_id = prot_source[POSE_ID]
prot_task_ids.append((prot_task_id, pose_id))
ligand_dict = self._downloadLigandStructures(lig_sources,
ligand_duplicates)
protein_dict = self._getDownloadedProteins(prot_task_ids)
self._appendDuplicateProteins(protein_dict, prot_duplicates)
return ligand_dict, protein_dict
def _downloadLigandStructures(self, lig_sources, ligand_duplicates):
"""
Download the ligand structures from LiveDesign using the source dicts.
Any duplicate strucrures stripped out by LD are added back in using the
ligand_duplicates.
:param lig_sources: A list of source dictionaries. Each dictionary
represents a pose.
:type lig_sources: [{}]
:param ligand_duplicates: a map of unique corporate IDs to a map of
unique pose IDs to a list of duplicate corporate and pose IDs,
where each of these duplicate ids share the same structure.
:type ligand_duplicates: {str: {int: [(str, int)]}
:return: a nested dictionary of ligand structures keyed by corporate id
and pose id
:rtype: {str: OrderedDict({int: structure.Structure})}
"""
if not lig_sources:
return defaultdict(OrderedDict)
# launch ligand export task
lig_task_id = self.ld_client.export_xtals(lig_sources, POSE_EXT)
# get ligand task results
lig_res_url = self.ld_client.wait_and_get_result_url(lig_task_id)
lig_xtal_res = self.ld_client.export_xtals_result(lig_res_url)
ligand_sts = self._readXtalResult(lig_xtal_res)
# If I understand ldclient correctly this should always be True
# It is possible the number of returned structures is not equal to the
# number of input source dicts as LD strips out the duplicate
# structures for versions 8.1+.
msg = ('The number of ligand structures is greater than the number of'
' sources.')
assert ligand_sts and len(ligand_sts) <= len(lig_sources), msg
ligand_dict = self._orderDownloadedLigands(lig_sources, ligand_sts,
ligand_duplicates)
return ligand_dict
def _orderDownloadedLigands(self, lig_sources, ligand_sts, duplicates):
"""
Starting from 8.1 the results from LDClient.export_xtals_result() no
longer guarantees the order of the structures returned will match the
input source dicts. Thus, for LD servers 8.1 and above, the title of
structures are parsed to retrieve the pose and corporate ids.
:param lig_sources: A list of source dictionaries. Each dictionary
represents a pose.
:type lig_sources: [{}]
:param ligand_sts: a list of the ligand structures
:type ligand_sts: [structure.Structure]
:param duplicates: a map of unique ligand corporate ids to a map of
pose ids to duplicate ids.
:type duplicates: {str: {int: [(str, int)]}
:return: a nested dictionary of ligand structures keyed by corporate id
and pose id
:rtype: {str: OrderedDict({int: structure.Structure})}
"""
ligand_dict = defaultdict(OrderedDict)
if self._version >= login.LD_VERSION_MULTIPLE_IDS:
for lig_st in ligand_sts:
# For some reason the structure's LD properties are set within
# the title as: <col title>.<corporate_id>.pose_<pose_id>.ETC;
# because the column <col title> itself may contain periods, we
# must split this string apart and then remove however many
# "extra" elements there might be for the column title
st_properties = lig_st.title.split('.')
non_title_props = st_properties[-NUM_TITLE_PROPERTIES + 1:]
corporate_id = non_title_props[0]
pose_id = int(non_title_props[1].replace('pose_', ''))
ligand_dict[corporate_id][pose_id] = lig_st
# Add in the duplicate entries here so we don't have to traverse
# the list twice.
self._appendDuplicateLigands(corporate_id, pose_id, ligand_dict,
duplicates)
else:
for source, lig_st in zip(lig_sources, ligand_sts):
corporate_id, pose_id = source[CORPORATE_ID], source[POSE_ID]
ligand_dict[corporate_id][pose_id] = lig_st
return ligand_dict
def _appendDuplicateLigands(self, org_corporate_id, org_pose_id,
ligand_dict, duplicates):
"""
The duplicate ligand corporate and pose ids are added back into the
dictionary of ligand structures keyed by corporate id and pose id to
ensure the dictionary data returned is identical for all versions of
LD. This measure will be unnecessary once all servers are upgraded to
8.1.
:param org_corporate_id: the unique corporate id for which we will find
the duplicates for.
:type org_corporate_id: str
:param org_pose_id: the unique pose id for which we will find the
duplicates for.
:type org_pose_id: int
:param ligand_dict: a nested dictionary of ligand structures keyed by
corporate id and pose id
:type ligand_dict: {str: OrderedDict({int: structure.Structure})}
:param duplicates: a map of unique ligand corporate ids to a map of
pose ids to duplicate ids.
:type duplicates: {str: {int: [(str, int)]}
"""
if self._version < login.LD_VERSION_MULTIPLE_IDS:
return
# Get the ligand to be duplicated
lig_st = ligand_dict[org_corporate_id][org_pose_id]
# Add the duplicate entries into the ligand dictionary along with the st
for dup_entry in duplicates[org_corporate_id][org_pose_id]:
dup_corporate_id, dup_pose_id = dup_entry
ligand_dict[dup_corporate_id][dup_pose_id] = copy.deepcopy(lig_st)
def _appendDuplicateProteins(self, protein_dict, duplicates):
"""
The duplicate protein pose ids are added back in to ensure the data
returned is identical for all versions of LD. This measure will be
unnecessary once all servers are upgraded to 8.1.
:param protein_dict: a mapping from protein structures to pose ids
:type protein_dict: OrderedDict({structure.Structure, [int]})
:param duplicates: a map of unique pose ids to the duplicates
:type duplicates: {int: [int]}
:return: a mapping from protein structures to pose ids (including any
duplicates)
:rtype: OrderedDict({structure.Structure, [int]})
"""
if self._version < login.LD_VERSION_MULTIPLE_IDS:
return
for protein_st, pose_ids in list(protein_dict.items()):
for pose_id in pose_ids:
if pose_id in duplicates:
protein_dict[protein_st].extend(duplicates[pose_id])
def _getDownloadedProteins(self, task_ids):
"""
Wait until the protein download tasks in `task_ids` have finished and
return the results as mappings from protein structures to pose ids.
:param task_ids: a list of (task id, pose id) pairs associated with each
protein export
:type task_ids: [(int, int)]
:return: a mapping from proteins to the lists of pose ids containing
those proteins.
:rtype: OrderedDict({structure.Structure, [int]})
"""
# mapping from unique proteins to lists of pose ids
protein_dict = OrderedDict()
protein_dict[None] = []
# cache for finding unique proteins based on string equivalence, returns
# id for protein_dict
protein_cache = {}
for prot_task_id, pose_id in task_ids:
if prot_task_id:
prot_res_url = self.ld_client.wait_and_get_result_url(
prot_task_id)
prot_xtal_res = self.ld_client.export_xtals_result(prot_res_url)
else:
# For LD versions < 8.1 it is possible the task_id is None
# since a protein for this pose_id might not exist. In this
# case, the structure is automatically set to None as well.
prot_xtal_res = None
# sometimes xtals_result returns empty string
if prot_xtal_res:
if prot_xtal_res in protein_cache:
protein_st = protein_cache[prot_xtal_res]
protein_dict[protein_st].append(pose_id)
continue
possible_protein_st = self._readXtalResult(prot_xtal_res)[0]
if possible_protein_st.atom_total > 1:
protein_st = possible_protein_st
# each time we find a new protein:
# store protein with current id
protein_dict[protein_st] = [pose_id]
# make res string point to prot id in cache
protein_cache[prot_xtal_res] = protein_st
continue
# this task_id returns no valid protein
protein_dict[None].append(pose_id)
return protein_dict
def _readXtalResult(self, xtal_res):
"""
Read the result string in mol2 format from export_xtals_result and
return a list of the structures contained within.
:param xtal_res: A string in mol2 format
:type xtal_res: `str` or `bytes`
:return: a list of the structures from xtal_res
:rtype: [structure.Structure]
"""
# Convert input to str if it is provided as bytes
xtal_res = str(xtal_res, encoding='utf-8')
if not xtal_res:
return []
suffix = '.' + POSE_EXT
with fileutils.tempfilename('ld_xtals', suffix) as xtal_filename:
with open(xtal_filename, 'w') as xtal_file:
xtal_file.write(xtal_res)
with structure.StructureReader(xtal_filename) as reader:
xtal_sts = list(reader)
return xtal_sts
def _combineTabularData(self, tabular_data_sts):
"""
Create an ordered dict from structures copied from "tabular_data_sts".
:param tabular_data_sts: The tabular data imported from LiveDesign
:type tabular_data_sts: [structure.Structure]
:return: Copies of the original structures, keyed by dummy
indices to conform to the same data structures as _import3DCol
:rtype OrderedDict({int: structure.Structure})
"""
return OrderedDict(
(idx, st.copy()) for idx, st in enumerate(tabular_data_sts))
def _combineStructure(self, st, st_3d):
"""
Combine the structures' properties by copying properties from one
structure to another.
:param st: the structure whose properties are to be copied.
:type st: `structure.Structure`
:param st_3d: structure with 3d data - the desired properties from `st`
are copied here
:type st_3d: `structure.Structure`
:return: `st_3d`, after the properties of `st` have been added to it
:rtype: `structure.Structure`
"""
for prop_name in list(st.property):
st_3d.property[prop_name] = st.property[prop_name]
return st_3d
[docs]def cache_entity_IDs(sts, ld_id_dict):
"""
Store the corporate ID (AKA the entity ID) as a structure property for
each imported structure
:param sts: List of structures to set properties for
:type sts: list(structure.Structure)
:param ld_id_dict: Live Design structure to entity ID dictionary
:type ld_id_dict: Dict[Structure, str]
"""
for st in sts:
corp_id = ld_id_dict.get(st)
ld_utils.safely_set_property(st, constants.PROPNAME_IMPORT_ENTITY_ID,
corp_id)
[docs]def get_st_entity_id_map(ld_client, sts, lr_id):
"""
Generate a dictionary mapping each compound's structure to its
entity id for easier access.
For LD versions < 8.1: the primary LD ID is used as an entity ID
For LD versions >= 8.1:
Every structure holds multiple LD IDs, out of which, one is the entity
ID. We use the live report metadata to obtain the correct ID.
:param ld_client: LiveReport client
:type ld_client: LDClient
:param sts: structures to get map for
:type sts: structure.Structure
:param lr_id: the live report id
:type lr_id: str
:return: dictionary mapping structure to entitiy id
:rtype: `dict(structure.Structure, str)`
"""
if login.get_LD_version(ld_client) < login.LD_VERSION_MULTIPLE_IDS:
return {st: st.property[SD_ID] for st in sts}
# Use metadata to get Entity IDs
lr_results_metadata = ld_client.live_report_results_metadata(lr_id)
row_info_list = lr_results_metadata[ROW_INFO_KEY]
display_to_entity_id_dict = {
row_info_dict[DISPLAY_ID_KEY].strip():
row_info_dict[ENTITY_ID_KEY].strip() for row_info_dict in row_info_list
}
# Use get to access property as as some structures don't have corporate IDs
st_to_entity_id = {
st: display_to_entity_id_dict.get(st.property.get(SD_ID)) for st in sts
}
return st_to_entity_id