Source code for schrodinger.application.jaguar.jaguar_keyword_utils

"""
Functions to help grab keywords from xml and jaguar input files

Copyright Schrodinger, LLC. All rights reserved.
"""

import csv
import os
import sys

from schrodinger.application.jaguar.input import JaguarInput as jinp
from schrodinger.application.jaguar.keywordDB import load_keywords
from schrodinger.utils import csv_unicode
from schrodinger.utils import fileutils

LEVELS_OF_THEORY = os.path.join(fileutils.get_mmshare_data_dir(), 'jaguar',
                                'levels_of_theory.csv')
SOLVENTS = os.path.join(fileutils.get_mmshare_data_dir(), 'jaguar',
                        'solvents.csv')

DFT_TYPES = {
    'is_recommended': 'Recommended',
    'is_dispersion_corrected_dft': 'Aposteriori-corrected',
    'is_long_range_corrected_dft': 'Long-range-corrected',
    'is_hybrid_dft': 'Hybrid',
    'is_meta_gga_dft': 'Meta-GGA',
    'is_gga_dft': 'Gradient-corrected (GGA)',
    'is_lda_dft': 'Local',
}


[docs]def jaguar_keywords_xml_filename():
    """
    Find the jaguar_keywords.xml file in $SCHRODINGER/mmshare-v*/
    """

    mmdata = fileutils.get_mmshare_data_dir()
    jaguar = os.path.join(mmdata, 'jaguar')
    keywords_xml = os.path.join(jaguar, 'jaguar_keywords.xml')

    if not os.path.exists(keywords_xml):
        msg = "%s not found!" % keywords_xml
        raise IOError(msg)

    return keywords_xml


[docs]def all_keywords():
    """
    return a list of all keywords
    """

    filename = jaguar_keywords_xml_filename()
    keywords_list, keywords_dict = load_keywords(filename)
    return list(keywords_dict)


[docs]def all_meaningful_keywords():
    """
    return a partial list keywords
    excludes all iopt,opt,cut,ip
    """

    exclusions = ['ip', 'iopt', 'opt', 'cut']
    keywords = all_keywords()
    meaningful_keywords = []

    for key in keywords:

        append_it = True

        for pre in exclusions:
            len_pre = len(pre)

            if key.startswith(pre):

                # if we can interpret the remainder of the string as an integer
                # it will not be appended
                is_int = True
                try:
                    my_int = int(key[len_pre:])
                except ValueError:
                    is_int = False

                if is_int:
                    append_it = False

        if append_it:
            meaningful_keywords.append(key)

    return meaningful_keywords


[docs]def dftnames_markup():
    """

    Return dictionary of DFT names, a short description, and DOI's to
    literature references if available, grouped by type.
    All information is taken from the levels_of_theory.csv file.
    This is intended for use by the documentation team for auto-doc'ing.
    See JAGUAR-9563.

    ::

        {'Local': [
             ('HFS', 'a short description', ['doi:1']),
             ('XALPHA', 'another functional', ['do1:2', 'doi:3']),
             ('SVWN', None, None),
            ...
        'Aposteriori-corrected': [
             ('PBE-ulg', None, None),
             ('B3LYP-MM', None, None),
            ...
        'Recommended': [
             ('B3LYP', None, None),
             ('B3LYP-MM', None, None),
            ...
        }
    """

    docs = {x: [] for x in DFT_TYPES.values()}

    with csv_unicode.reader_open(LEVELS_OF_THEORY) as fh:
        reader = csv.DictReader(fh)
        for row in reader:
            if row['is_non_dft'] != '1':
                # Exclude non-DFT methods
                for col, value in row.items():
                    if col in DFT_TYPES and value == '1':
                        name = row['method'].strip()
                        desc = row['description']
                        if desc is not None:
                            desc = desc.strip()
                        refs = row['references']
                        # Convert possible multiple refs into a list
                        if refs is not None:
                            refs = refs.strip()
                            if ' ' in refs:
                                refs = refs.split()
                            else:
                                refs = [refs]
                        docs[DFT_TYPES[col]].append((name, desc, refs))
    return docs


[docs]def all_dftnames():
    """
    Return a list of all dftnames.
    Also includes HF and MP2.
    """

    dftnames = []
    with csv_unicode.reader_open(LEVELS_OF_THEORY) as fh:
        reader = csv.DictReader(fh)
        for row in reader:
            for col, val in row.items():
                if col == 'method':
                    dftnames.append(row[col].strip().upper())

    if 'MP2' not in dftnames:
        dftnames.append('MP2')

    return sorted(dftnames)


[docs]def all_basisnames():
    """
    return a list of all basis set names
    each basis set name is itself a list
    containing 'base' name, backup name, nplus, nstar
    """

    from schrodinger.application.jaguar import basis

    basisnames = []
    all_sets = basis.get_bases()

    for basis in all_sets:
        full_name = basis.name
        # add pluses and stars
        full_name = ''.join([basis.name, '*' * basis.nstar, '+' * basis.nplus])
        basisnames.append(clean_basisname(full_name))

    return basisnames


[docs]def jaguar_input_keywords(fname):
    """
    return a dictionary of the keywords
    in fname that are non-default
    hf and mp2 are considered 'dftname'
    and dftname and basis are always included
    """

    try:
        ji = jinp(input=fname)
    except Exception as e:
        print('Error: cannot create input: %s' % e)
        sys.exit(1)

    my_dict = ji.getNonDefault()
    st = ji.getStructure()

    mult_string = 'i_m_Spin_muliplicity'
    charge_string = 'i_m_Molecular_charge'

    if mult_string in st.property:
        my_dict['multip'] = st.property[mult_string]

    if charge_string in st.property:
        my_dict['molchg'] = st.property[charge_string]

    keys = list(my_dict)
    if 'dftname' not in keys:
        if 'mp2' in keys:
            if my_dict['mp2'] != 0:
                my_dict['dftname'] = 'MP2'
            else:
                my_dict['dftname'] = 'HF'
        else:
            my_dict['dftname'] = 'HF'

    if 'basis' not in keys:
        my_dict['basis'] = ji.getDefault('basis')

    # ensure basis name is legit
    my_dict['basis'] = clean_basisname(my_dict['basis'])
    my_dict['dftname'] = my_dict['dftname'].upper()

    return my_dict


[docs]def clean_basisname(basis):
    """
    return a cleaned up basis name
    i.e. NAME+++****
    """

    nplus = basis.count('+')
    nstar = basis.count('*')

    basis_name = basis.upper().replace('+', '').replace('*', '')

    basis_name = ''.join([basis_name, '+' * nplus, '*' * nstar])

    return basis_name


[docs]def keyword_coverage(input_files, print_report=True):
    """
    given a list of input files
    return in this order (as return 1, 2, 3, 4, 5, 6)
    1 - percent of keywords covered
    2 - percent of basis sets covered
    3 - percent of functionals covered
    4 - list of uncovered keywords
    5 - list of uncovered basis sets
    6 - list of uncovered functionals
    """

    input_keywords = []
    input_functionals = []
    input_basis = []

    all_keywords = all_meaningful_keywords()
    all_functionals = all_dftnames()
    all_basis = all_basisnames()

    for file in input_files:

        keywords = jaguar_input_keywords(file)
        keys = list(keywords)

        basis = keywords['basis']
        functional = keywords['dftname']

        for key in keys:
            if key not in input_keywords:
                input_keywords.append(key)

        # requires an exact match, i.e. not just base name match
        if basis not in input_basis:
            input_basis.append(basis)

        if functional not in input_functionals:
            input_functionals.append(functional)

    # make comparisons
    n_all_keywords = len(all_keywords)
    n_all_functionals = len(all_functionals)
    n_all_basis = len(all_basis)

    for functional in input_functionals:
        if functional in all_functionals:
            all_functionals.remove(functional)

    for key in input_keywords:
        if key in all_keywords:
            all_keywords.remove(key)

    for basis in input_basis:
        basisname_in_list(basis, all_basis, True)

    n_uncovered_keywords = len(all_keywords)
    n_uncovered_functionals = len(all_functionals)
    n_uncovered_basis = len(all_basis)

    percent_keywords = 100 * float(n_all_keywords -
                                   n_uncovered_keywords) / n_all_keywords
    percent_functionals = 100 * float(
        n_all_functionals - n_uncovered_functionals) / n_all_functionals
    percent_basis = 100 * float(n_all_basis - n_uncovered_basis) / n_all_basis

    if print_report:
        print("-------------------------------------")
        print(" Percent Coverage of Jaguar Keywords")
        print("-------------------------------------")
        print(" Keywords      %f (%d of %d) \n" %
              (percent_keywords,
               (n_all_keywords - n_uncovered_keywords), n_all_keywords))
        print(
            " basis sets    %f (%d of %d) \n" %
            (percent_basis,
             (n_all_functionals - n_uncovered_functionals), n_all_functionals))
        print(" functionals   %f (%d of %d) \n" %
              (percent_functionals,
               (n_all_basis - n_uncovered_basis), n_all_basis))
        print("-------------------------------------")

        print("Uncovered Keywords:")
        for key in all_keywords:
            print(key)

        print("Uncovered Basis Sets:")
        for basis in all_basis:
            print(basis)

        print("Uncovered Functionals:")
        for name in all_functionals:
            print(name)

    return percent_keywords, percent_basis, percent_functionals, all_keywords, all_basis, all_functionals


[docs]def supported_basis_sets(functional):
    """
    returns the supported basis sets for a
    particular funcional, only useful for B3LYP-MM/B3LYP-LOC
    """
    supp_base = {
        'B3LYP-MM': ['LACVP*', 'CC-PVDZ++'],
        'B3LYP-LOC': ['6-31G*', 'CC-PVDZ', 'CC-PVTZ+', '6-311G-3DF-3PD+']
    }

    if functional.upper() in supp_base:
        return supp_base[functional.upper()]
    else:
        return all_basisnames()


[docs]def basis_is_supported_for_functional(functional, basis):
    """
    is this functional/basis set combination supported
    really just checks B3LYP-MM and B3LYP-LOC
    """

    # make sure its ordered
    bas = clean_basisname(basis)

    supported = False
    supp_func = ['B3LYP-MM', 'B3LYP-LOC']
    if functional.upper() in supp_func:
        supported_basis = supported_basis_sets(functional)
        if basis in supported_basis:
            supported = True
    else:
        supported = True

    return supported


[docs]def basisname_in_list(basis, lst, remove=False):
    """
    Inspect list to see if basisname is in it. Compares only by 'basename', i.e.
    no '`*`'s or '`+`'s optionally removes any matches from the list

    :return: bool
    """

    in_list = False

    basename = clean_basisname(basis.replace('*', '').replace('+', ''))

    for name in list(lst):
        basename_list = clean_basisname(name.replace('*', '').replace('+', ''))
        if basename_list == basename:
            in_list = True
            if remove:
                lst.remove(name)

    return in_list