Source code for schrodinger.application.desmond.replica_exchange_review

import copy
from collections import defaultdict

import numpy


[docs]def line_match_final_log(line):
    if line.find("::: finished :::") > -1:
        return True
    else:
        return False


[docs]def accept_reject_parse(file_input, n_replicas):
    '''
    Parse log file for REMDrelated lines
    :param file_input: Iterator of replica exchange log lines
    :type n_replicas: `int`
    :param n_replicas: number of replicas
    :returns: (acc_count, rej_count, reject_dict, total_dict, replica_states):

        - acc_count - Number of accepted exchanges
        - rej_count - Number of rejected exchanges
        - rej_dict - Number of rejected exchanges per replica pair
        - total_dict - Number of exchange attempts per replica pair
        - replica_states - List of (time, replica_indices) for each time where
          replicas differ from previous time
    
    :rtype: `tuple(int, int, dict, dict, list)`
    '''
    #   Accumulate changes of indices from the ACCEPTances
    switches = []
    rej_count, acc_count = 0, 0
    time_now = 0.0
    reject_dict, total_dict = defaultdict(int), defaultdict(int)
    replica_idx = list(range(n_replicas))
    replica_states = [(time_now, replica_idx[:])]
    for line in file_input:
        if not len(line) or line[0] == '#' or line[0:8] == 'Chemical':
            continue
        if line_match_final_log(line):
            break
        fields = line.split()
        if len(fields) < 2:
            continue
        # Expecting a line with the following format
        #       0:time 1:exchange 2:rep_1 3:rep_2  4:T_1  5:T_2   6:U_1(x_1)   7:U_2(x_2)   8:U_1(x_2)   9:U_2(x_1)     10:P_1     11:P_2     12:V_1     13:V_2       14:arg
        # Make sure this is a valid line
        if fields[1] in ('REJECT', 'ACCEPT'):
            # If the time has changed, commit the completed time
            time_read = float(fields[0])
            if time_read != time_now:
                if len(switches) > 0:
                    for s0, s1 in switches:
                        replica_idx[s0], replica_idx[s1] = replica_idx[
                            s1], replica_idx[s0]
                    replica_states.append((time_now, replica_idx[:]))
                    switches.clear()
                time_now = time_read
            if fields[1] == 'REJECT':
                # Index by temperature or Hamiltonian state index
                idx = f"{fields[4]} - {fields[5]}" if fields[4] != fields[
                    5] else f"{fields[2]} - {fields[3]}"
                rej_count += 1
                reject_dict[idx] += 1
                total_dict[idx] += 1
            elif fields[1] == 'ACCEPT':
                # Index by temperature or Hamiltonian state index
                idx = f"{fields[4]} - {fields[5]}" if fields[4] != fields[
                    5] else f"{fields[2]} - {fields[3]}"
                acc_count += 1
                switches.append((int(fields[2][1:]), int(fields[3][1:])))
                total_dict[idx] += 1
    # Finally run through the last time point's accumulated switches (assumes successful completion)
    if len(switches) > 0:
        for s0, s1 in switches:
            replica_idx[s0], replica_idx[s1] = replica_idx[s1], replica_idx[s0]
        replica_states.append((time_now, replica_idx[:]))
    return acc_count, rej_count, reject_dict, total_dict, replica_states


[docs]def read_data_from_log_file(logfile, cfg_map):
    # Contributors: Yujie Wu, John Shelley, Matvey Adzhigirey
    # New parser is based on the actual simulation configuration as recorded in
    # the log file and thus should work for whatever types of REMD. -YW

    input_log = open(logfile, "r")

    # Also determine if CPU or GPU codebase was ran
    isGPU = False
    if 'graph' in list(cfg_map.remd):
        isGPU = True

    # Gets the number of replicas first. Then finds out the temperatures for all
    # replicas from the configuration.

    use_temps = []
    simulation_time = None
    if not isGPU:
        for replica in cfg_map.remd.cfg:
            cfg_map_tmp = copy.deepcopy(cfg_map)
            cfg_map_tmp.update(replica.val)
            try:
                rep = cfg_map_tmp.integrator.temperature.T_ref.val
            except:
                rep = cfg_map_tmp.integrator.temperature[0].T_ref.val
            use_temps.append(rep)
        # now check if you actually got different replica temeratures
        # if it's a REST job, all temperatures will be the same
        if len(set(use_temps)) == 1:
            use_temps = []
            for replica in cfg_map.remd.cfg:
                replica_name = replica.force.term.gibbs.window.val
                use_temps.append(replica_name)

    else:
        for replica_name, replica_setting in cfg_map.remd.graph.key_value():
            cfg_map_tmp = copy.deepcopy(cfg_map)
            cfg_map_tmp.update(replica_setting.val)
            if replica_name == 'edges':
                continue
            rep = cfg_map_tmp.integrator.temperature.T_ref.val
            use_temps.append(rep)
        # now check if you actually got different replica temperatures, if
        # it's a rest job, all temepatures will be the same
        if len(set(use_temps)) == 1:
            replica_name = list(cfg_map.remd.graph)
            replica_name.remove('edges')
            use_temps = [name.strip('T') for name in replica_name]
    use_temps = sorted(use_temps)
    #get the total time of the simulation, so the plot reflects it.
    try:
        simulation_time = cfg_map.remd.last_time.val
    except:
        pass

    num_replicas = len(use_temps)

    acc_count, rej_count, _, _, replica_states = accept_reject_parse(
        input_log, num_replicas)

    # Sort the information so that it is in the form [(Time, [Temperatures])]
    # where Temperatures are the temperatures for each replica (in their
    # original order)
    orig_replica_temps = []
    for sys_state in replica_states:
        temps = []
        for ind in range(0, len(use_temps)):
            a_temp = use_temps[sys_state[1].index(ind)]
            temps.append(a_temp)
        time_temp = (sys_state[0], temps)
        orig_replica_temps.append(time_temp)

    time_list = []
    temp_lists_list = []  # list of lists
    for time_temp in orig_replica_temps:
        #print "%10.3f " % time_temp[0],

        time_list.append(time_temp[0])

        for i, a_temp in enumerate(time_temp[1]):
            if len(temp_lists_list) == i:
                # This is the first time value
                temp_lists_list.append([a_temp])
            else:
                temp_lists_list[i].append(a_temp)

    # Post-process the lists:
    # Add 2 points for each exchange so that the transitions between
    # replicas are drawn as vertical lines instead of diagonal lines:

    doubled_time_list = []
    for i, time in enumerate(time_list):
        if i == 0:
            doubled_time_list.append(time)
        else:
            doubled_time_list.append(time)
            doubled_time_list.append(time)

    doubled_temp_lists_list = []
    for temps in temp_lists_list:
        doubled_temps = []
        for i, temp in enumerate(temps):
            if i == len(temps) - 1:
                doubled_temps.append(temp)
            else:
                doubled_temps.append(temp)
                doubled_temps.append(temp)
        doubled_temp_lists_list.append(doubled_temps)

    line_list = []
    #add last time point
    if simulation_time is not None:
        doubled_time_list.append(simulation_time)
    for temps in doubled_temp_lists_list:
        if simulation_time is not None:
            #add replica temperature for at the very end of the simulation
            temps.append(temps[-1])
        line_list.append([numpy.array(doubled_time_list), numpy.array(temps)])
    return use_temps, line_list