Source code for schrodinger.application.vss.database
import csv
import glob
import os
from ruamel import yaml
from voluptuous import Required
from voluptuous import Schema
from schrodinger import structure
from schrodinger.utils import fileutils
CID_COL = 'Title'
TRUTH_COL = 'Truth'
SMILES_COL = 'SMILES'
INDEX_FILE = 'index.yml'
SHAPE_DATA_SCHEMA = Schema({
Required('pharm'): str,
'atom_color': str,
})
INDEX_SCHEMA = Schema({
Required('name'): str,
Required('local', default=False): bool,
Required('smiles'): str,
'shape': SHAPE_DATA_SCHEMA,
})
def _get_paths(pattern):
return sorted(os.path.abspath(fn) for fn in glob.iglob(pattern))
[docs]class Database:
'''
Database metadata.
'''
[docs] def __init__(self, path):
'''
:param path: Directory path.
:type path: str
'''
self.root = os.path.abspath(path)
with open(os.path.join(path, INDEX_FILE), 'r') as fp:
data = yaml.safe_load(fp)
data = INDEX_SCHEMA(data)
self.name = data['name']
self.local = data['local']
self._smiles_csv_glob = data['smiles']
self._shape_data_glob = data.get('shape', {})
[docs] def get_smiles_csv(self):
with fileutils.chdir(self.root):
return _get_paths(self._smiles_csv_glob)
[docs] def get_shape_data(self, shape_type):
with fileutils.chdir(self.root):
try:
return _get_paths(self._shape_data_glob[shape_type])
except KeyError:
return []
[docs] def count_structures(self):
return sum(
structure.count_structures(fn) for fn in self.get_smiles_csv())
[docs] def validate(self):
'''
:return: Validation success and error message.
:rtype: (bool, str)
'''
for fn in self.get_smiles_csv():
with open(fn, 'r') as fp:
reader = csv.DictReader(fp)
columns = set(reader.fieldnames or [])
for col in (CID_COL, SMILES_COL):
if col not in columns:
return (False, f"'{fn}': lacks '{col}' column")
return (True, '')