maui.utils.false_color_spectrogram_prepare_dataset¶
- maui.utils.false_color_spectrogram_prepare_dataset(df, datetime_col, duration_col=None, file_path_col=None, output_dir=None, unit='scale_60', calculate_acoustic_indices=True, **kwargs)[source]¶
Prepare a dataset for generating false-color spectrograms, segmenting audio files, and calculating acoustic indices.
- Return type:
DataFrame
- Parameters:
- dfpandas.DataFrame
DataFrame containing the audio file paths and timestamps.
- datetime_colstr
Column name for the start time of the audio files.
- duration_colstr, optional
Column name for the duration of the audio files. If None, it will be calculated from file_path_col.
- file_path_colstr, optional
Column name for the audio file paths. Required if duration_col is None.
- output_dirstr, optional
Directory where segmented audio files and results will be stored.
- unitstr, optional
Time unit for segmentation. Default is ‘scale_60’.
- calculate_acoustic_indicesbool, optional
If True, acoustic indices will be calculated for the segmented files.
- **kwargsdict
Additional parameters for calculating acoustic indices. The available kwargs are:
- acoustic_indices_methods: list of str
A list of methods used for calculating acoustic indices.
- pre_calculation_method: callable
A method to be applied before the calculation of acoustic indices.
- parallel: bool
Whether to perform the calculation of acoustic indices in parallel.
- chunk_size: int, optional
Size of the chunks of data to be processed in parallel. Default is 5.
- temp_dir: str, optional
Path to a temporary directory for storing intermediate results.
- Returns:
- pandas.DataFrame
A DataFrame containing the segmented audio files and, optionally, the calculated acoustic indices.
- Raises:
- Exception
If both duration_col and file_path_col are None, or if there are overlaps in the audio files, or if time gaps are detected between segments.
Examples
>>> from maui import samples, utils >>> df = samples.get_audio_sample(dataset="leec") >>> df["dt"] = pd.to_datetime(df["timestamp_init"]).dt.date >>> def pre_calculation_method(s, fs): >>> Sxx_power, tn, fn, ext = maad.sound.spectrogram (s, fs) >>> Sxx_noNoise= maad.sound.median_equalizer(Sxx_power, display=False, extent=ext) >>> Sxx_dB_noNoise = maad.util.power2dB(Sxx_noNoise) >>> >>> Sxx, tn, fn, ext = maad.sound.spectrogram(s, fs, mode='amplitude') >>> >>> pre_calc_vars = {'Sxx': Sxx, 'tn':tn , 'fn':fn , 'ext':ext, 'Sxx_dB_noNoise':Sxx_dB_noNoise } >>> return pre_calc_vars >>> >>> def get_aci(pre_calc_vars): >>> aci_xx, aci_per_bin, aci_sum = maad.features.acoustic_complexity_index(pre_calc_vars['Sxx']) >>> indices = {'aci_xx': aci_xx, 'aci_per_bin':aci_per_bin , 'aci_sum':aci_sum} >>> return indices >>> >>> def get_spectral_events(pre_calc_vars): >>> EVNspFract_per_bin, EVNspMean_per_bin, EVNspCount_per_bin, EVNsp = maad.features.spectral_events( >>> pre_calc_vars['Sxx_dB_noNoise'], >>> dt=pre_calc_vars['tn'][1] - pre_calc_vars['tn'][0], >>> dB_threshold=6, >>> rejectDuration=0.1, >>> display=False, >>> extent=pre_calc_vars['ext']) >>> >>> indices = {'EVNspFract_per_bin': EVNspFract_per_bin, 'EVNspMean_per_bin':EVNspMean_per_bin , 'EVNspCount_per_bin':EVNspCount_per_bin, 'EVNsp':EVNsp} >>> return indices >>> def get_spectral_activity(pre_calc_vars): >>> ACTspfract_per_bin, ACTspcount_per_bin, ACTspmean_per_bin = maad.features.spectral_activity(pre_calc_vars['Sxx_dB_noNoise']) >>> indices = {'ACTspfract_per_bin': ACTspfract_per_bin, 'ACTspcount_per_bin':ACTspcount_per_bin , 'ACTspmean_per_bin':ACTspmean_per_bin} >>> return indices >>> acoustic_indices_methods = [get_aci, get_spectral_activity, get_spectral_events] >>> >>> df_temp = df.iloc[0:1] >>> segmented_df = utils.false_color_spectrogram_prepare_dataset( >>> df_temp, >>> datetime_col = 'timestamp_init', >>> duration_col = 'duration', >>> file_path_col = 'file_path', >>> indices = ['acoustic_complexity_index', 'spectral_activity', 'spectral_events'], >>> output_dir = './segmented_indices', >>> store_audio_segments = True, >>> unit = 'scale_02', >>> acoustic_indices_methods = acoustic_indices_methods, >>> pre_calculation_method = pre_calculation_method, >>> temp_dir = os.path.abspath('./temp_ac_files/'), >>> parallel = True >>> )