Source code for SOAPify.HDF5er.ToHDF5

"""This submodule contains some function to import date to the hdf5 files"""
import warnings
import h5py
import numpy
from MDAnalysis import Universe as mdaUniverse, AtomGroup as mdaAtomGroup
from deprecated import deprecated
from ase.io import iread as aseIRead
from ase.io import read as aseRead
from .HDF5erUtils import exportChunk2HDF5


[docs]def universe2HDF5( mdaTrajectory: "mdaUniverse | mdaAtomGroup", trajFolder: h5py.Group, trajChunkSize: int = 100, trajslice: slice = slice(None), useType="float64", ): """Uploads an mda.Universe or an mda.AtomGroup to a h5py.Group in an hdf5 file Args: MDAUniverseOrSelection (MDAnalysis.Universe or MDAnalysis.AtomGroup): the container with the trajectory data trajFolder (h5py.Group): the group in which store the trajectory in the hdf5 file trajChunkSize (int, optional): The desired dimension of the chunks of data that are stored in the hdf5 file. Defaults to 100. useType (str,optional): The precision used to store the data. Defaults to "float64". """ atoms = mdaTrajectory.atoms universe = mdaTrajectory.universe nat = len(atoms) useType = numpy.dtype(useType) if "Types" not in list(trajFolder.keys()): trajFolder.create_dataset("Types", (nat), compression="gzip", data=atoms.types) if "Trajectory" not in list(trajFolder.keys()): trajFolder.create_dataset( "Trajectory", (0, nat, 3), compression="gzip", chunks=(trajChunkSize, nat, 3), maxshape=(None, nat, 3), dtype=useType, ) if "Box" not in list(trajFolder.keys()): trajFolder.create_dataset( "Box", (0, 6), compression="gzip", chunks=True, maxshape=(None, 6), dtype=useType, ) frameNum = 0 first = 0 boxes = [] atomicframes = [] for _ in universe.trajectory[trajslice]: boxes.append(universe.dimensions.copy()) atomicframes.append(atoms.positions) frameNum += 1 if frameNum % trajChunkSize == 0: exportChunk2HDF5(trajFolder, first, frameNum, boxes, atomicframes) first = frameNum boxes = [] atomicframes = [] # in the case that there are some dangling frames if frameNum != first: exportChunk2HDF5(trajFolder, first, frameNum, boxes, atomicframes)
[docs]def MDA2HDF5( mdaTrajectory: "mdaUniverse | mdaAtomGroup", targetHDF5File: str, groupName: str, trajChunkSize: int = 100, override: bool = False, attrs: dict = None, trajslice: slice = slice(None), useType="float64", ): """Creates an HDF5 trajectory groupfrom an mda trajectory Opens or creates the given HDF5 file, request the user's chosen group, then uploads an mda.Universe or an mda.AtomGroup to a h5py.Group in an hdf5 file **WARNING**: in the HDF5 file if the chosen group is already present it will be overwritten by the new data Args: MDAUniverseOrSelection (MDAnalysis.Universe or MDAnalysis.AtomGroup): the container with the trajectory data targetHDF5File (str): the name of HDF5 file groupName (str): the name of the group in wich save the trajectory data within the `targetHDF5File` trajChunkSize (int, optional): The desired dimension of the chunks of data that are stored in the hdf5 file. Defaults to 100. override (bool, optional): If true the hdf5 file will be completely overwritten. Defaults to False. useType (str,optional): The precision used to store the data. Defaults to "float64". """ with h5py.File(targetHDF5File, "w" if override else "a") as newTraj: trajGroup = newTraj.require_group(f"Trajectories/{groupName}") universe2HDF5( mdaTrajectory, trajGroup, trajChunkSize=trajChunkSize, trajslice=trajslice, useType=useType, ) if attrs: for key in attrs.keys(): trajGroup.attrs.create(key, attrs[key])
[docs]@deprecated('xyz2hdf5Converter is "legacy code" **not covered by unit tests**') def xyz2hdf5Converter( xyzName: str, boxfilename: str, group: h5py.Group ): # pragma: no cover """Generate an HDF5 trajectory from an xyz file and a box file This function reads an xyz file with ase and then export it to an trajectory in and hdf5 file, the user should pass the group within the hdf5file to this function **NB**: this is "legacy code" **not covered by unit tests**, use with caution Args: xyzName (str): the filename of the xyz trajaectory boxfilename (str): the filename of the per frame box dimensions group (h5py.Group): the group within the hdf5 file where the trajectroy will be saved """ # TODO: convert use exportChunk2HDF5 warnings.warn( 'this is untested "legacy code", use with caution', PendingDeprecationWarning ) frame = aseRead(xyzName) nat = len(frame.get_positions()) if "Types" not in list(group.keys()): group.create_dataset( "Types", (nat), compression="gzip", data=frame.get_chemical_symbols() ) if "Trajectory" not in list(group.keys()): group.create_dataset( "Trajectory", (0, nat, 3), compression="gzip", chunks=(10, nat, 3), maxshape=(None, nat, 3), dtype=numpy.float64, ) if "Box" not in list(group.keys()): group.create_dataset( "Box", (0, 6), compression="gzip", chunks=True, maxshape=(None, 6), dtype=numpy.float64, ) xyz = aseIRead(xyzName) with open(boxfilename, "r") as bf: frameNum = 0 first = 0 boxes = [] atomicframes = [] for box, frame in zip(bf, xyz): t = box.split() boxInfo = numpy.array( [float(t[0]), float(t[1]), float(t[2]), 90.0, 90.0, 90.0] ) boxes.append(boxInfo) atomicframes.append(frame.get_positions()) frameNum += 1 if frameNum % 20 == 0: group["Box"].resize((frameNum, 6)) group["Trajectory"].resize((frameNum, nat, 3)) print( f"[{first}-{frameNum}]", len(boxes), len(group["Box"][first:frameNum]), ) group["Box"][first:frameNum] = boxes group["Trajectory"][first:frameNum] = atomicframes first = frameNum boxes = [] atomicframes = [] if frameNum != first: group["Box"].resize((frameNum, 6)) group["Trajectory"].resize((frameNum, nat, 3)) print( f"[{first}-{frameNum}]", len(boxes), len(group["Box"][first:frameNum]), ) group["Box"][first:frameNum] = boxes group["Trajectory"][first:frameNum] = atomicframes