"""
Functions to help grab keywords from xml and jaguar input files
Copyright Schrodinger, LLC. All rights reserved.
"""
import csv
import os
import sys
from schrodinger.application.jaguar.input import JaguarInput as jinp
from schrodinger.application.jaguar.keywordDB import load_keywords
from schrodinger.utils import csv_unicode
from schrodinger.utils import fileutils
LEVELS_OF_THEORY = os.path.join(fileutils.get_mmshare_data_dir(), 'jaguar',
'levels_of_theory.csv')
SOLVENTS = os.path.join(fileutils.get_mmshare_data_dir(), 'jaguar',
'solvents.csv')
DFT_TYPES = {
'is_recommended': 'Recommended',
'is_dispersion_corrected_dft': 'Aposteriori-corrected',
'is_long_range_corrected_dft': 'Long-range-corrected',
'is_hybrid_dft': 'Hybrid',
'is_meta_gga_dft': 'Meta-GGA',
'is_gga_dft': 'Gradient-corrected (GGA)',
'is_lda_dft': 'Local',
}
[docs]def jaguar_keywords_xml_filename():
"""
Find the jaguar_keywords.xml file in $SCHRODINGER/mmshare-v*/
"""
mmdata = fileutils.get_mmshare_data_dir()
jaguar = os.path.join(mmdata, 'jaguar')
keywords_xml = os.path.join(jaguar, 'jaguar_keywords.xml')
if not os.path.exists(keywords_xml):
msg = "%s not found!" % keywords_xml
raise IOError(msg)
return keywords_xml
[docs]def all_keywords():
"""
return a list of all keywords
"""
filename = jaguar_keywords_xml_filename()
keywords_list, keywords_dict = load_keywords(filename)
return list(keywords_dict)
[docs]def all_meaningful_keywords():
"""
return a partial list keywords
excludes all iopt,opt,cut,ip
"""
exclusions = ['ip', 'iopt', 'opt', 'cut']
keywords = all_keywords()
meaningful_keywords = []
for key in keywords:
append_it = True
for pre in exclusions:
len_pre = len(pre)
if key.startswith(pre):
# if we can interpret the remainder of the string as an integer
# it will not be appended
is_int = True
try:
my_int = int(key[len_pre:])
except ValueError:
is_int = False
if is_int:
append_it = False
if append_it:
meaningful_keywords.append(key)
return meaningful_keywords
[docs]def dftnames_markup():
"""
Return dictionary of DFT names, a short description, and DOI's to
literature references if available, grouped by type.
All information is taken from the levels_of_theory.csv file.
This is intended for use by the documentation team for auto-doc'ing.
See JAGUAR-9563.
::
{'Local': [
('HFS', 'a short description', ['doi:1']),
('XALPHA', 'another functional', ['do1:2', 'doi:3']),
('SVWN', None, None),
...
'Aposteriori-corrected': [
('PBE-ulg', None, None),
('B3LYP-MM', None, None),
...
'Recommended': [
('B3LYP', None, None),
('B3LYP-MM', None, None),
...
}
"""
docs = {x: [] for x in DFT_TYPES.values()}
with csv_unicode.reader_open(LEVELS_OF_THEORY) as fh:
reader = csv.DictReader(fh)
for row in reader:
if row['is_non_dft'] != '1':
# Exclude non-DFT methods
for col, value in row.items():
if col in DFT_TYPES and value == '1':
name = row['method'].strip()
desc = row['description']
if desc is not None:
desc = desc.strip()
refs = row['references']
# Convert possible multiple refs into a list
if refs is not None:
refs = refs.strip()
if ' ' in refs:
refs = refs.split()
else:
refs = [refs]
docs[DFT_TYPES[col]].append((name, desc, refs))
return docs
[docs]def all_dftnames():
"""
Return a list of all dftnames.
Also includes HF and MP2.
"""
dftnames = []
with csv_unicode.reader_open(LEVELS_OF_THEORY) as fh:
reader = csv.DictReader(fh)
for row in reader:
for col, val in row.items():
if col == 'method':
dftnames.append(row[col].strip().upper())
if 'MP2' not in dftnames:
dftnames.append('MP2')
return sorted(dftnames)
[docs]def all_basisnames():
"""
return a list of all basis set names
each basis set name is itself a list
containing 'base' name, backup name, nplus, nstar
"""
from schrodinger.application.jaguar import basis
basisnames = []
all_sets = basis.get_bases()
for basis in all_sets:
full_name = basis.name
# add pluses and stars
full_name = ''.join([basis.name, '*' * basis.nstar, '+' * basis.nplus])
basisnames.append(clean_basisname(full_name))
return basisnames
[docs]def clean_basisname(basis):
"""
return a cleaned up basis name
i.e. NAME+++****
"""
nplus = basis.count('+')
nstar = basis.count('*')
basis_name = basis.upper().replace('+', '').replace('*', '')
basis_name = ''.join([basis_name, '+' * nplus, '*' * nstar])
return basis_name
[docs]def keyword_coverage(input_files, print_report=True):
"""
given a list of input files
return in this order (as return 1, 2, 3, 4, 5, 6)
1 - percent of keywords covered
2 - percent of basis sets covered
3 - percent of functionals covered
4 - list of uncovered keywords
5 - list of uncovered basis sets
6 - list of uncovered functionals
"""
input_keywords = []
input_functionals = []
input_basis = []
all_keywords = all_meaningful_keywords()
all_functionals = all_dftnames()
all_basis = all_basisnames()
for file in input_files:
keywords = jaguar_input_keywords(file)
keys = list(keywords)
basis = keywords['basis']
functional = keywords['dftname']
for key in keys:
if key not in input_keywords:
input_keywords.append(key)
# requires an exact match, i.e. not just base name match
if basis not in input_basis:
input_basis.append(basis)
if functional not in input_functionals:
input_functionals.append(functional)
# make comparisons
n_all_keywords = len(all_keywords)
n_all_functionals = len(all_functionals)
n_all_basis = len(all_basis)
for functional in input_functionals:
if functional in all_functionals:
all_functionals.remove(functional)
for key in input_keywords:
if key in all_keywords:
all_keywords.remove(key)
for basis in input_basis:
basisname_in_list(basis, all_basis, True)
n_uncovered_keywords = len(all_keywords)
n_uncovered_functionals = len(all_functionals)
n_uncovered_basis = len(all_basis)
percent_keywords = 100 * float(n_all_keywords -
n_uncovered_keywords) / n_all_keywords
percent_functionals = 100 * float(
n_all_functionals - n_uncovered_functionals) / n_all_functionals
percent_basis = 100 * float(n_all_basis - n_uncovered_basis) / n_all_basis
if print_report:
print("-------------------------------------")
print(" Percent Coverage of Jaguar Keywords")
print("-------------------------------------")
print(" Keywords %f (%d of %d) \n" %
(percent_keywords,
(n_all_keywords - n_uncovered_keywords), n_all_keywords))
print(
" basis sets %f (%d of %d) \n" %
(percent_basis,
(n_all_functionals - n_uncovered_functionals), n_all_functionals))
print(" functionals %f (%d of %d) \n" %
(percent_functionals,
(n_all_basis - n_uncovered_basis), n_all_basis))
print("-------------------------------------")
print("Uncovered Keywords:")
for key in all_keywords:
print(key)
print("Uncovered Basis Sets:")
for basis in all_basis:
print(basis)
print("Uncovered Functionals:")
for name in all_functionals:
print(name)
return percent_keywords, percent_basis, percent_functionals, all_keywords, all_basis, all_functionals
[docs]def supported_basis_sets(functional):
"""
returns the supported basis sets for a
particular funcional, only useful for B3LYP-MM/B3LYP-LOC
"""
supp_base = {
'B3LYP-MM': ['LACVP*', 'CC-PVDZ++'],
'B3LYP-LOC': ['6-31G*', 'CC-PVDZ', 'CC-PVTZ+', '6-311G-3DF-3PD+']
}
if functional.upper() in supp_base:
return supp_base[functional.upper()]
else:
return all_basisnames()
[docs]def basis_is_supported_for_functional(functional, basis):
"""
is this functional/basis set combination supported
really just checks B3LYP-MM and B3LYP-LOC
"""
# make sure its ordered
bas = clean_basisname(basis)
supported = False
supp_func = ['B3LYP-MM', 'B3LYP-LOC']
if functional.upper() in supp_func:
supported_basis = supported_basis_sets(functional)
if basis in supported_basis:
supported = True
else:
supported = True
return supported
[docs]def basisname_in_list(basis, lst, remove=False):
"""
Inspect list to see if basisname is in it. Compares only by 'basename', i.e.
no '`*`'s or '`+`'s optionally removes any matches from the list
:return: bool
"""
in_list = False
basename = clean_basisname(basis.replace('*', '').replace('+', ''))
for name in list(lst):
basename_list = clean_basisname(name.replace('*', '').replace('+', ''))
if basename_list == basename:
in_list = True
if remove:
lst.remove(name)
return in_list