Source code for storx.hash_m

"""Contains functionality to calculate hash values."""
import hashlib
import random
import numpy as np
import xarray as xr

[docs] def hash_array(array: xr.DataArray, nsample=3): """ Return a hash for the given array created. The algorithm converts randomly selected array elements to the string representation of the underlying bytedata which is used in a sha512 algorithm. """ seed = 5772156649015328606 random.seed(a=seed, version=2) # Select nsample random flat indices within the array size selected_inds = [random.randint(0, array.size) for i in range(nsample)] # The corresponding value in the multidimensional array for each selected # index selected_vals = np.zeros(nsample) for j in range(nsample): # Unravel flat index to multidimensional index unraveled_inds = np.array(np.unravel_index(selected_inds[j], array.shape)) # Create dictionary that can by used by xarray index selection attrs = {array.dims[i]:unraveled_inds[i] \ for i in range(len(array.dims))} selected_vals[j] = array.isel(attrs) hasher = hashlib.new("sha512") hasher.update(np.ndarray.tobytes(selected_vals)) return hasher.hexdigest()