Source code for SOAPify.utils

"""Utilities submodule, cointains unclassified support functions
Author: Daniele Rapetti"""
from itertools import combinations_with_replacement
import numpy
from ase.data import atomic_numbers
import h5py


def _SOAPpstr(l, Z, n, Zp, np) -> str:
    if atomic_numbers[Z] < atomic_numbers[Zp]:
        Z, Zp = Zp, Z
        n, np = np, n
    return f"{l}_{Z}{n}_{Zp}{np}"


[docs]def getdscribeSOAPMapping( lmax: int, nmax: int, species: "list[str]", crossover: bool = True ) -> numpy.ndarray: """returns how dscribe saves the SOAP results return a list of string with the identities of the data returned from dscribe, see the note in https://singroup.github.io/dscribe/1.2.x/tutorials/descriptors/soap.html Args: lmax (int): the lmax specified in the calculation. nmax (int): the nmax specified in the calculation. species (list[str]): the list of atomic species. crossover (bool): if True, the SOAP descriptors are generated for the mixed species. Defaults to True. Returns: numpy.ndarray: an array of strings with the mapping of the output of the analysis """ species = orderByZ(species) pdscribe = [] for Z in species: for Zp in species: if not crossover and Z != Zp: continue for l in range(lmax + 1): for n in range(nmax): for np in range(nmax): if (np, atomic_numbers[Zp]) >= (n, atomic_numbers[Z]): pdscribe.append(_SOAPpstr(l, Z, n, Zp, np)) return numpy.array(pdscribe)
def _getRSindex(nmax: int, species: "list[str]") -> numpy.ndarray: """Support function for quippy""" rsIndex = numpy.zeros((2, nmax * len(species)), dtype=numpy.int32) i = 0 for iSpecies in range(len(species)): for na in range(nmax): rsIndex[:, i] = na, iSpecies i += 1 return rsIndex
[docs]def getquippySOAPMapping( lmax: int, nmax: int, species: "list[str]", diagonalRadial: bool = False ) -> numpy.ndarray: """returns how quippi saves the SOAP results return a list of string with the identities of the data returned from quippy, see https://github.com/libAtoms/GAP/blob/main/descriptors.f95#L7588 Args: lmax (int): the lmax specified in the calculation. nmax (int): the nmax specified in the calculation. species (list[str]): the list of atomic species. diagonalRadial (bool): if True, Only return the n1=n2 elements of the power spectrum. **NOT IMPLEMENTED**. Defaults to False. Returns: numpy.ndarray: an array of strings with the mapping of the output of the analysis """ species = orderByZ(species) rsIndex = _getRSindex(nmax, species) pquippy = [] for ia in range(len(species) * nmax): np = rsIndex[0, ia] Zp = species[rsIndex[1, ia]] for jb in range(ia + 1): # ia is in the range n = rsIndex[0, jb] Z = species[rsIndex[1, jb]] # if diagonalRadial and np != n: # continue for l in range(lmax + 1): pquippy.append(_SOAPpstr(l, Z, n, Zp, np)) return numpy.array(pquippy)
[docs]def orderByZ(species: "list[str]") -> "list[str]": """Orders the list of species by their atomic number Args: species (list[str]): the list of atomic species to be ordered Returns: list[str]: the ordered list of atomic species """ return sorted(species, key=lambda x: atomic_numbers[x])
[docs]def getAddressesQuippyLikeDscribe( lmax: int, nmax: int, species: "list[str]" ) -> numpy.ndarray: """create a support bdarray to reorder the quippy output in a dscribe fashion Given the lmax and nmax of a SOAP calculation and the species of the atoms returns an array of idexes for reordering the quippy results as the dscribe results Args: lmax (int): the lmax specified in the calculation. nmax (int): the nmax specified in the calculation. species (list[str]): the list of atomic species. Returns: numpy.ndarray: an array of indexes """ species = orderByZ(species) nsp = len(species) addresses = numpy.zeros( (lmax + 1) * ((nmax * nsp) * (nmax * nsp + 1)) // 2, dtype=int ) quippyOrder = getquippySOAPMapping(lmax, nmax, species) dscribeOrder = getdscribeSOAPMapping(lmax, nmax, species) for i, _ in enumerate(addresses): addresses[i] = numpy.where(quippyOrder == dscribeOrder[i])[0][0] return addresses
[docs]def normalizeArray(x: numpy.ndarray) -> numpy.ndarray: """Normalizes the futher axis of the given array (eg. in an array of shape (100,50,3) normalizes all the 5000 3D vectors) Args: x (numpy.ndarray): the array to be normalized Returns: numpy.ndarray: the normalized array """ norm = numpy.linalg.norm(x, axis=-1, keepdims=True) norm[norm == 0] = 1 return x / norm
[docs]def getSlicesFromAttrs(attrs: dict) -> "tuple(list,dict)": """returns the positional slices for the calculated fingerprints Given the attributes of from a SOAP dataset returns the slices of the SOAP vector that contains the pair information Args: attrs (dict): the attributes of the SOAP dataset Returns: tuple(list,dict): the slices of the SOAP vector to be extracted and the atomic types """ species = attrs["species"] slices = {} for symbol1 in species: for symbol2 in species: if f"species_location_{symbol1}-{symbol2}" in attrs: slices[symbol1 + symbol2] = slice( attrs[f"species_location_{symbol1}-{symbol2}"][0], attrs[f"species_location_{symbol1}-{symbol2}"][1], ) return species, slices
def _getIndexesForFillSOAPVectorFromdscribeSameSpecies( lMax: int, nMax: int, ) -> numpy.ndarray: """returns the indexes of the SOAP vector to be reordered given lMax and nMax returns the indexes of the SOAP vector to be reordered to fill a complete vector. useful to calculate the correct distances between the SOAP vectors Args: lMax (int): the lmax specified in the calculation. nMax (int): the nmax specified in the calculation. Returns: numpy.ndarray: the array of the indexes in the correct order """ completeData = numpy.zeros(((lMax + 1), nMax, nMax), dtype=int) limitedID = 0 for l in range(lMax + 1): for n in range(nMax): for nP in range(n, nMax): completeData[l, n, nP] = limitedID completeData[l, nP, n] = limitedID limitedID += 1 return completeData.reshape(-1) def _getIndexesForFillSOAPVectorFromdscribe( lMax: int, nMax: int, atomTypes: list = None, atomicSlices: dict = None, ) -> numpy.ndarray: """Given the data of a SOAP calculation from dscribe returns the SOAP power spectrum with also the symmetric part explicitly stored, see the note in https://singroup.github.io/dscribe/1.2.x/tutorials/descriptors/soap.html No controls are implemented on the shape of the soapFromdscribe vector. Args: soapFromdscribe (numpy.ndarray): the result of the SOAP calculation from the dscribe utility lmax (int): the l_max specified in the calculation. Defaults to 8. nMax (int): the n_max specified in the calculation. Defaults to 8. atomTypes (list[str]): the list of atomic species. Defaults to [None]. atomicSlices (dict): the slices of the SOAP vector relative to che atomic species combinations. Defaults to None. Returns: numpy.ndarray: The full soap spectrum, with the symmetric part sored explicitly """ if atomTypes is None: atomTypes = [None] if atomTypes == [None]: return _getIndexesForFillSOAPVectorFromdscribeSameSpecies(lMax, nMax) nOfFeatures = (lMax + 1) * nMax * nMax nofCombinations = len(list(combinations_with_replacement(atomTypes, 2))) completeData = numpy.zeros(nOfFeatures * nofCombinations, dtype=int) combinationID = 0 for i, symbol1 in enumerate(atomTypes): for j in range(i, len(atomTypes)): symbol2 = atomTypes[j] completeID = combinationID * nOfFeatures completeSlice = slice( completeID, completeID + nOfFeatures, ) if symbol1 == symbol2: completeData[completeSlice] = ( _getIndexesForFillSOAPVectorFromdscribeSameSpecies(lMax, nMax) + atomicSlices[symbol1 + symbol2].start ) else: completeData[completeSlice] = ( numpy.arange(nOfFeatures, dtype=int) + atomicSlices[symbol1 + symbol2].start ) combinationID += 1 return completeData
[docs]def fillSOAPVectorFromdscribe( soapFromdscribe: numpy.ndarray, lMax: int, nMax: int, atomTypes: list = None, atomicSlices: dict = None, ) -> numpy.ndarray: """Given the result of a SOAP calculation from dscribe returns the SOAP power spectrum with also the symmetric part explicitly stored, see the note in https://singroup.github.io/dscribe/1.2.x/tutorials/descriptors/soap.html No controls are implemented on the shape of the soapFromdscribe vector. Args: soapFromdscribe (numpy.ndarray): the result of the SOAP calculation from the dscribe utility lMax (int): the l_max specified in the calculation. nMax (int): the n_max specified in the calculation. Returns: numpy.ndarray: The full soap spectrum, with the symmetric part sored explicitly """ if atomTypes is None: atomTypes = [None] upperDiag = int((lMax + 1) * nMax * (nMax + 1) / 2) fullmat = nMax * nMax * (lMax + 1) limitedSOAPdim = upperDiag * len(atomTypes) + fullmat * int( (len(atomTypes) - 1) * len(atomTypes) / 2 ) # enforcing the order of the atomTypes if len(atomTypes) > 1: atomTypes = list(atomTypes) atomTypes.sort(key=lambda x: atomic_numbers[x]) if soapFromdscribe.shape[-1] != limitedSOAPdim: raise ValueError( "fillSOAPVectorFromdscribe: the given soap vector do not have the expected dimensions" ) indexes = _getIndexesForFillSOAPVectorFromdscribe( lMax, nMax, atomTypes, atomicSlices ) if len(soapFromdscribe.shape) == 1: return soapFromdscribe[indexes] if len(soapFromdscribe.shape) == 2: return soapFromdscribe[:, indexes] if len(soapFromdscribe.shape) == 3: return soapFromdscribe[:, :, indexes] raise ValueError( "fillSOAPVectorFromdscribe: cannot convert array with len(shape) >=3" )
[docs]def getSOAPSettings(fitsetData: h5py.Dataset) -> dict: """Gets the settings of the SOAP calculation you can feed directly this output to :func:`fillSOAPVectorFromdscribe` #TODO: make tests for this Args: fitsetData (h5py.Dataset): A soap dataset with attributes Returns: dict: a dictionary with the following components: - **nMax** - **lMax** - **atomTypes** - **atomicSlices** """ lmax = fitsetData.attrs["l_max"] nmax = fitsetData.attrs["n_max"] symbols, atomicSlices = getSlicesFromAttrs(fitsetData.attrs) return { "nMax": nmax, "lMax": lmax, "atomTypes": symbols, "atomicSlices": atomicSlices, }