Source code for das4whales.dask_wrap
"""
dask_wrap.py - Dask wrapper functions for DAS data processing
This module provides functions to wrap up the functions of das4whales in a dask way.
Authors: Léa Bouffaut, Quentin Goestchel
Date: 2024
"""
# File that wrap up functions in dsp.py in a dask way
import h5py
import wget
import os
import numpy as np
import dask.array as da
from datetime import datetime
import das4whales as dw
[docs]
def load_das_data(filename, selected_channels, metadata):
"""
Load the DAS data corresponding to the input file name as strain according to the selected channels.
Parameters
----------
filename : str
The full path to the data to load.
selected_channels : list
A list containing the start, stop, and step values for selecting channels.
metadata : dict
A dictionary filled with metadata (sampling frequency, channel spacing, scale factor, etc.).
Returns
-------
np.ndarray
A [channel x sample] numpy array containing the strain data.
np.ndarray
The corresponding time axis (s).
np.ndarray
The corresponding distance along the FO cable axis (m).
datetime.datetime
The beginning time of the file.
Raises
------
ValueError
If the file is not found.
"""
if not os.path.exists(filename):
raise ValueError('File not found')
f = h5py.File(filename, 'r') # HDF5 file
d = f['Acquisition/Raw[0]/RawData'] # Pointer on on-disk array f
# UTC Time vector for naming
raw_data_time = f['Acquisition/Raw[0]/RawDataTime']
# For future save
file_begin_time_utc = datetime.utcfromtimestamp(raw_data_time[0] * 1e-6)
# Store the following as the dimensions of our data block
nnx = d.shape[0]
nns = d.shape[1]
# Define new time and distance axes
tx = np.arange(nns) / metadata["fs"]
dist = (np.arange(nnx)[selected_channels[0]:selected_channels[1]:selected_channels[2]]) * metadata["dx"]
return d, tx, dist, file_begin_time_utc
[docs]
def raw2strain(tr, metadata, selected_channels):
"""Convert a daskarray filled of int32 to a
Parameters
----------
tr : dask.array.core.Array
daskarray built on the HDF5 pointer
metadata : dict
dictionary of metadata
selected_channels : list
list of selected spatial indexes and spatial step
Returns
-------
dask.array.core.Array
daskarray filled with scaled float64
"""
trace = tr[selected_channels[0]:selected_channels[1]:selected_channels[2], :].astype(np.float64)
trace -= da.mean(trace, axis=1, keepdims=True) #demeaning using dask mean function
trace *= metadata["scale_factor"]
return trace