import copy
from collections import defaultdict
import numpy
[docs]def line_match_final_log(line):
if line.find("::: finished :::") > -1:
return True
else:
return False
[docs]def accept_reject_parse(file_input, n_replicas):
'''
Parse log file for REMDrelated lines
:param file_input: Iterator of replica exchange log lines
:type n_replicas: `int`
:param n_replicas: number of replicas
:returns: (acc_count, rej_count, reject_dict, total_dict, replica_states):
- acc_count - Number of accepted exchanges
- rej_count - Number of rejected exchanges
- rej_dict - Number of rejected exchanges per replica pair
- total_dict - Number of exchange attempts per replica pair
- replica_states - List of (time, replica_indices) for each time where
replicas differ from previous time
:rtype: `tuple(int, int, dict, dict, list)`
'''
# Accumulate changes of indices from the ACCEPTances
switches = []
rej_count, acc_count = 0, 0
time_now = 0.0
reject_dict, total_dict = defaultdict(int), defaultdict(int)
replica_idx = list(range(n_replicas))
replica_states = [(time_now, replica_idx[:])]
for line in file_input:
if not len(line) or line[0] == '#' or line[0:8] == 'Chemical':
continue
if line_match_final_log(line):
break
fields = line.split()
if len(fields) < 2:
continue
# Expecting a line with the following format
# 0:time 1:exchange 2:rep_1 3:rep_2 4:T_1 5:T_2 6:U_1(x_1) 7:U_2(x_2) 8:U_1(x_2) 9:U_2(x_1) 10:P_1 11:P_2 12:V_1 13:V_2 14:arg
# Make sure this is a valid line
if fields[1] in ('REJECT', 'ACCEPT'):
# If the time has changed, commit the completed time
time_read = float(fields[0])
if time_read != time_now:
if len(switches) > 0:
for s0, s1 in switches:
replica_idx[s0], replica_idx[s1] = replica_idx[
s1], replica_idx[s0]
replica_states.append((time_now, replica_idx[:]))
switches.clear()
time_now = time_read
if fields[1] == 'REJECT':
# Index by temperature or Hamiltonian state index
idx = f"{fields[4]} - {fields[5]}" if fields[4] != fields[
5] else f"{fields[2]} - {fields[3]}"
rej_count += 1
reject_dict[idx] += 1
total_dict[idx] += 1
elif fields[1] == 'ACCEPT':
# Index by temperature or Hamiltonian state index
idx = f"{fields[4]} - {fields[5]}" if fields[4] != fields[
5] else f"{fields[2]} - {fields[3]}"
acc_count += 1
switches.append((int(fields[2][1:]), int(fields[3][1:])))
total_dict[idx] += 1
# Finally run through the last time point's accumulated switches (assumes successful completion)
if len(switches) > 0:
for s0, s1 in switches:
replica_idx[s0], replica_idx[s1] = replica_idx[s1], replica_idx[s0]
replica_states.append((time_now, replica_idx[:]))
return acc_count, rej_count, reject_dict, total_dict, replica_states
[docs]def read_data_from_log_file(logfile, cfg_map):
# Contributors: Yujie Wu, John Shelley, Matvey Adzhigirey
# New parser is based on the actual simulation configuration as recorded in
# the log file and thus should work for whatever types of REMD. -YW
input_log = open(logfile, "r")
# Also determine if CPU or GPU codebase was ran
isGPU = False
if 'graph' in list(cfg_map.remd):
isGPU = True
# Gets the number of replicas first. Then finds out the temperatures for all
# replicas from the configuration.
use_temps = []
simulation_time = None
if not isGPU:
for replica in cfg_map.remd.cfg:
cfg_map_tmp = copy.deepcopy(cfg_map)
cfg_map_tmp.update(replica.val)
try:
rep = cfg_map_tmp.integrator.temperature.T_ref.val
except:
rep = cfg_map_tmp.integrator.temperature[0].T_ref.val
use_temps.append(rep)
# now check if you actually got different replica temeratures
# if it's a REST job, all temperatures will be the same
if len(set(use_temps)) == 1:
use_temps = []
for replica in cfg_map.remd.cfg:
replica_name = replica.force.term.gibbs.window.val
use_temps.append(replica_name)
else:
for replica_name, replica_setting in cfg_map.remd.graph.key_value():
cfg_map_tmp = copy.deepcopy(cfg_map)
cfg_map_tmp.update(replica_setting.val)
if replica_name == 'edges':
continue
rep = cfg_map_tmp.integrator.temperature.T_ref.val
use_temps.append(rep)
# now check if you actually got different replica temperatures, if
# it's a rest job, all temepatures will be the same
if len(set(use_temps)) == 1:
replica_name = list(cfg_map.remd.graph)
replica_name.remove('edges')
use_temps = [name.strip('T') for name in replica_name]
use_temps = sorted(use_temps)
#get the total time of the simulation, so the plot reflects it.
try:
simulation_time = cfg_map.remd.last_time.val
except:
pass
num_replicas = len(use_temps)
acc_count, rej_count, _, _, replica_states = accept_reject_parse(
input_log, num_replicas)
# Sort the information so that it is in the form [(Time, [Temperatures])]
# where Temperatures are the temperatures for each replica (in their
# original order)
orig_replica_temps = []
for sys_state in replica_states:
temps = []
for ind in range(0, len(use_temps)):
a_temp = use_temps[sys_state[1].index(ind)]
temps.append(a_temp)
time_temp = (sys_state[0], temps)
orig_replica_temps.append(time_temp)
time_list = []
temp_lists_list = [] # list of lists
for time_temp in orig_replica_temps:
#print "%10.3f " % time_temp[0],
time_list.append(time_temp[0])
for i, a_temp in enumerate(time_temp[1]):
if len(temp_lists_list) == i:
# This is the first time value
temp_lists_list.append([a_temp])
else:
temp_lists_list[i].append(a_temp)
# Post-process the lists:
# Add 2 points for each exchange so that the transitions between
# replicas are drawn as vertical lines instead of diagonal lines:
doubled_time_list = []
for i, time in enumerate(time_list):
if i == 0:
doubled_time_list.append(time)
else:
doubled_time_list.append(time)
doubled_time_list.append(time)
doubled_temp_lists_list = []
for temps in temp_lists_list:
doubled_temps = []
for i, temp in enumerate(temps):
if i == len(temps) - 1:
doubled_temps.append(temp)
else:
doubled_temps.append(temp)
doubled_temps.append(temp)
doubled_temp_lists_list.append(doubled_temps)
line_list = []
#add last time point
if simulation_time is not None:
doubled_time_list.append(simulation_time)
for temps in doubled_temp_lists_list:
if simulation_time is not None:
#add replica temperature for at the very end of the simulation
temps.append(temps[-1])
line_list.append([numpy.array(doubled_time_list), numpy.array(temps)])
return use_temps, line_list