'''
Structure and coordinate file manipulation library.
Reading a structure file::
reader = molfile.mae.read('/path/to/foo.mae')
Iterating through the frames in a file::
for frame in molfile.dtr.read('/path/to/foo.dtr').frames():
function( frame.pos, frame.vel, frame.time, frame.box )
Random access to frames (only dtr files support this currently)::
f27 = molfile.dtr.read('/path/to/foo.dtr').frame(27) # 0-based index
Convert an mae file to a pdb file::
input=molfile.mae.read('foo.mae')
output=molfile.pdb.write('foo.pdb', atoms=input.atoms)
output.frame(input.frames().next())
output.close()
Write every 10th frame in a dtr to a trr::
input=molfile.dtr.read('big.dtr')
output=molfile.trr.write('out.trr, natoms=input.natoms)
for i in range(0,input.nframes, 10):
output.frame( input.frame(i) )
output.close()
Write a frame with a specified set of gids::
f = molfile.Frame(natoms, with_gids=True
f.gid[:] = my_gids
f.pos[:] = my_positions
w.frame(f)
Read the raw fields from a frameset (dtr)::
dtr = molfile.DtrReader('input.dtr') # also works for stk
for i in range(dtr.nframes):
f = dtr.frame(i)
keyvals = dict()
frame = dtr.frame(i, keyvals=keyvals)
## use data in keyvals
Write raw fields to a frameset (dtr)::
dtr = molfile.DtrWriter('output.dtr', natoms=natoms)
keyvals = dict( s = "a string",
f = positions.flatten(), # must be 1d arrays
i = numpy.array([1,2,3]),
)
dtr.append( time = my_time, keyvals = keyvals )
'''
import bisect
import os
import numpy
from . import findframe
from ._molfile import * # noqa F403
extensiondict = dict()
[docs]def register_plugin(plugin):
''' put plugin in the global namespace, and add to extensiondict '''
globals()[plugin.name] = plugin
d = extensiondict
for ext in plugin.filename_extensions.split(','):
ext = ext.strip()
if ext:
d.setdefault(ext, []).append(plugin)
_molfile.register_all(register_plugin)
[docs]def load_shared_library(path):
register_shared_library(path, register_plugin)
[docs]def guess_filetype(filename, default=None):
''' return plugin name based on filename, or default if none found. '''
dot = filename.rfind('.')
if dot > 0:
key = filename[dot + 1:].lower()
val = extensiondict.get(key)
if val:
return val[-1]
return default
[docs]class FrameIter(object):
[docs] def __init__(self, reader):
if reader.nframes >= 0:
self.reader = reader
self.curframe = 0
self.nframes = self.reader.nframes
else:
# number of frames is not known, so we have to use the
# Reader.next() method which does not rewind. Reload the
# reader so that we can keep a private copy of our
# current frame.
self.reader = reader.reopen()
self.curframe = -1
def __iter__(self):
return self
def __next__(self):
if self.curframe < 0:
# iterate the reader itself
f = next(self.reader)
if not f:
raise StopIteration
return f
# using our own iterator state
if self.curframe >= self.nframes:
raise StopIteration
f = self.reader.frame(self.curframe)
self.curframe += 1
return f
[docs] def skip(self, count=1):
r = self.reader
if count < 0:
raise ValueError("skip count must be nonnegative")
while count:
r.skip()
count -= 1
def reader_frames(self):
return FrameIter(self)
_molfile.Reader.frames = reader_frames
del reader_frames
[docs]class Grid(object):
[docs] def __init__(self, data, name='', axis=None, origin=None):
''' construct a new Grid object.
data - 3d array of data; a copy is made.
name - name for the grid. default empty string.
axis - 3x3 array with grid axes in the rows. Default diag(1,1,1)
origin - 0,0,0 corner of grid. Defaults to [0,0,0]
'''
self._data = numpy.array(data, 'f')
dims = self._data.shape
if len(dims) != 3:
raise ValueError("bad dims: want 3, got %d" % (len(dims)))
self._name = name
self._axis = numpy.eye(3, dtype='f')
if axis is not None:
axis = numpy.array(axis, 'f')
if axis.shape != (3, 3):
raise ValueError("bad axis: want 3x3, got %s" % (axis.shape,))
self._axis[:] = axis
self._origin = numpy.zeros(3, 'f')
if origin is not None:
origin = numpy.array(origin, 'f')
if origin.shape != (3,):
raise ValueError("bad origin: want 3 elements, got %d" %
(len(origin)))
self._origin[:] = origin
def __repr__(self):
x, y, z = self._data.shape
return "<%s: (%d, %d, %d)>" % (self.name, x, y, z)
@property
def name(self):
return self._name
@property
def data(self):
return self._data
@property
def axis(self):
return self._axis
@property
def origin(self):
return self._origin
def _grid_from_reader(reader, n):
meta = reader.grid_meta(n)
data = numpy.empty(meta["dims"], 'f')
reader.grid_data(n, data)
name = meta["name"]
x = meta["xaxis"]
y = meta["yaxis"]
z = meta["zaxis"]
origin = meta["origin"]
return Grid(data, name=name, axis=[x, y, z], origin=origin)
def _grid_to_writer(writer, grid):
d = {
'name': grid.name,
'origin': grid.origin.tolist(),
'xaxis': grid.axis[0].tolist(),
'yaxis': grid.axis[1].tolist(),
'zaxis': grid.axis[2].tolist(),
'dims': grid.data.shape,
}
writer._grid(d, grid.data)
return writer
_molfile.Reader.grid = _grid_from_reader
_molfile.Writer.grid = _grid_to_writer
[docs]class StkFile(object):
''' Generalized stk file: handles any molfile format that provides times'''
name = 'ls'
filename_extensions = 'ls'
[docs] @classmethod
def read(cls, path, filetype=None):
return cls.Reader(path, filetype)
[docs] class Reader(object):
[docs] def __init__(self, path, filetype):
# path is location of stkfile
# Extract the dirname
# Is path a symlink? If so, figure out where the real file is.
dirname = os.path.dirname(os.path.realpath(path))
with open(path) as fp:
lines = (x.strip() for x in fp)
paths = [x for x in lines if x]
paths = [
os.path.join(dirname, x) if not os.path.isabs(x) else x
for x in paths
]
if filetype is None:
klasses = (guess_filetype(path, SeqFile) for path in paths)
else:
klass = globals()[filetype]
klasses = (klass for path in paths)
readers = [klass.read(r) for klass, r in zip(klasses, paths)]
# remove trailing empty readers
while readers and readers[-1].nframes == 0:
del readers[-1]
# Store the list of sizes to allow get_prop to work.
self._sizes = [r.nframes for r in readers]
if readers:
first = readers[-1].times[0]
i = len(readers) - 1
while i > 0:
i -= 1
cur = readers[i]
n = cur.nframes
while n > 0 and cur.times[n - 1] >= first:
n -= 1
self._sizes[i] = n
if n > 0:
first = min(first, cur.times[0])
seqmap = []
offset = 0
for sz in self._sizes:
offset += sz
seqmap.append(offset)
self.readers = readers
self.seqmap = seqmap
self.times = numpy.concatenate(
[r.times[:sz] for r, sz in zip(self.readers, self._sizes)])
@property
def natoms(self):
return 0 if not self.readers else self.readers[0].natoms
@property
def nframes(self):
return 0 if not self.readers else self.seqmap[-1]
[docs] def frames(self):
return (self.frame(i) for i in range(self.nframes))
[docs] def frame(self, n):
index = bisect.bisect_right(self.seqmap, n)
if index > 0:
n -= self.seqmap[index - 1]
return self.readers[index].frame(n)
[docs] def get_prop(self, prop):
'''
Use the same technique as used for times to generate a properly ordered set of properties across all readers
'''
return numpy.concatenate([
r._get_prop(prop)[:sz]
for r, sz in zip(self.readers, self._sizes)
])
[docs] def at_time_near(self, time):
return self.frame(findframe.at_time_near(self.times, time))
register_plugin(StkFile)
with_pandas = True
[docs]class SeqFile(object):
'''Read csv-like files with column names in the first row
'''
filename_extensions = 'seq'
name = 'seq'
[docs] @classmethod
def read(cls, path):
''' Open an eneseq file for reading '''
return cls.Reader(path)
[docs] class Reader(object):
@staticmethod
def _parse_header(line):
# mapping from eneseq column name to molfile.Frame attribute
ene2frame = dict(
time='time',
E='total_energy',
E_p='potential_energy',
E_k='kinetic_energy',
E_x='extended_energy',
P='pressure',
V='volume',
T='temperature',
)
elems = [x for x in line[1:].split() if ':' in x]
if not elems:
# handle metadynamics output.
attrs = [x for x in line[1:].split()]
else:
attrs = (x.split(':')[1] for x in elems)
return [ene2frame.get(x, x) for x in attrs]
[docs] def __init__(self, path):
'''
Only use a direct read to determine the header for the file
Otherwise, use numpy.loadtxt
'''
props = None
for line in file(path):
cols = line.strip().split()
if len(cols) > 1 and cols[0] == '#' and cols[1].endswith(
'time'):
props = self._parse_header(line)
break
self.rows = None
if with_pandas:
import pandas
try:
self.rows = pandas.read_csv(path,
delim_whitespace=True,
header=None,
comment='#').as_matrix()
except ValueError:
pass
if self.rows is None:
self.rows = numpy.loadtxt(path, ndmin=2)
self.times = self.rows[:, 0] if len(self.rows) else numpy.array([])
self.props = props
@property
def natoms(self):
return 0
@property
def nframes(self):
return len(self.rows)
def _get_prop(self, prop):
'''
Returns a MUTABLE reference to a property.
Used to eliminate a copy when concatenating multiple seq files
'''
try:
return self.rows[:, self.props.index(prop)]
except ValueError:
raise ValueError("Cannot find property {:s}".format(prop))
[docs] def get_prop(self, prop):
# Column accessor
return numpy.copy(self._get_prop(prop))
[docs] def frame(self, n):
f = Frame(natoms=0)
for key, val in zip(self.props, self.rows[n]):
setattr(f, key, val)
return f
[docs] def frames(self):
return (self.frame(i) for i in range(self.nframes))
[docs] def at_time_near(self, time):
return self.frame(findframe.at_time_near(self.times, time))
register_plugin(SeqFile)