Source code for storx.hash_m
"""Contains functionality to calculate hash values."""
import hashlib
import random
import numpy as np
import xarray as xr
[docs]
def hash_array(array: xr.DataArray, nsample=3):
"""
Return a hash for the given array created.
The algorithm converts randomly selected array elements to the string
representation of the underlying bytedata which is used in a sha512
algorithm.
"""
seed = 5772156649015328606
random.seed(a=seed, version=2)
# Select nsample random flat indices within the array size
selected_inds = [random.randint(0, array.size) for i in range(nsample)]
# The corresponding value in the multidimensional array for each selected
# index
selected_vals = np.zeros(nsample)
for j in range(nsample):
# Unravel flat index to multidimensional index
unraveled_inds = np.array(np.unravel_index(selected_inds[j],
array.shape))
# Create dictionary that can by used by xarray index selection
attrs = {array.dims[i]:unraveled_inds[i] \
for i in range(len(array.dims))}
selected_vals[j] = array.isel(attrs)
hasher = hashlib.new("sha512")
hasher.update(np.ndarray.tobytes(selected_vals))
return hasher.hexdigest()