@@ -54,11 +54,11 @@ class Spectrogram(torch.nn.Module):
54
54
wkwargs (dict or None, optional): Arguments for window function. (Default: ``None``)
55
55
center (bool, optional): whether to pad :attr:`waveform` on both sides so
56
56
that the :math:`t`-th frame is centered at time :math:`t \times \text{hop\_length}`.
57
- Default: ``True``
57
+ ( Default: ``True``)
58
58
pad_mode (string, optional): controls the padding method used when
59
- :attr:`center` is ``True``. Default: ``"reflect"``
59
+ :attr:`center` is ``True``. ( Default: ``"reflect"``)
60
60
onesided (bool, optional): controls whether to return half of results to
61
- avoid redundancy Default: ``True``
61
+ avoid redundancy ( Default: ``True``)
62
62
return_complex (bool, optional):
63
63
Indicates whether the resulting complex-valued Tensor should be represented with
64
64
native complex dtype, such as `torch.cfloat` and `torch.cdouble`, or real dtype
@@ -343,7 +343,7 @@ class InverseMelScale(torch.nn.Module):
343
343
tolerance_change (float, optional): Difference in losses to stop optimization at. (Default: ``1e-8``)
344
344
sgdargs (dict or None, optional): Arguments for the SGD optimizer. (Default: ``None``)
345
345
norm (Optional[str]): If 'slaney', divide the triangular mel weights by the width of the mel band
346
- (area normalization). (Default: ``None``)
346
+ (area normalization). (Default: ``None``)
347
347
mel_scale (str, optional): Scale to use: ``htk`` or ``slaney``. (Default: ``htk``)
348
348
"""
349
349
__constants__ = ['n_stft' , 'n_mels' , 'sample_rate' , 'f_min' , 'f_max' , 'max_iter' , 'tolerance_loss' ,
@@ -434,25 +434,28 @@ class MelSpectrogram(torch.nn.Module):
434
434
435
435
Args:
436
436
sample_rate (int, optional): Sample rate of audio signal. (Default: ``16000``)
437
+ n_fft (int, optional): Size of FFT, creates ``n_fft // 2 + 1`` bins. (Default: ``400``)
437
438
win_length (int or None, optional): Window size. (Default: ``n_fft``)
438
439
hop_length (int or None, optional): Length of hop between STFT windows. (Default: ``win_length // 2``)
439
- n_fft (int, optional): Size of FFT, creates ``n_fft // 2 + 1`` bins. (Default: ``400``)
440
440
f_min (float, optional): Minimum frequency. (Default: ``0.``)
441
441
f_max (float or None, optional): Maximum frequency. (Default: ``None``)
442
442
pad (int, optional): Two sided padding of signal. (Default: ``0``)
443
443
n_mels (int, optional): Number of mel filterbanks. (Default: ``128``)
444
444
window_fn (Callable[..., Tensor], optional): A function to create a window tensor
445
445
that is applied/multiplied to each frame/window. (Default: ``torch.hann_window``)
446
+ power (float, optional): Exponent for the magnitude spectrogram,
447
+ (must be > 0) e.g., 1 for energy, 2 for power, etc. (Default: ``2``)
448
+ normalized (bool, optional): Whether to normalize by magnitude after stft. (Default: ``False``)
446
449
wkwargs (Dict[..., ...] or None, optional): Arguments for window function. (Default: ``None``)
447
450
center (bool, optional): whether to pad :attr:`waveform` on both sides so
448
451
that the :math:`t`-th frame is centered at time :math:`t \times \text{hop\_length}`.
449
- Default: ``True``
452
+ ( Default: ``True``)
450
453
pad_mode (string, optional): controls the padding method used when
451
- :attr:`center` is ``True``. Default: ``"reflect"``
454
+ :attr:`center` is ``True``. ( Default: ``"reflect"``)
452
455
onesided (bool, optional): controls whether to return half of results to
453
- avoid redundancy. Default: ``True``
456
+ avoid redundancy. ( Default: ``True``)
454
457
norm (Optional[str]): If 'slaney', divide the triangular mel weights by the width of the mel band
455
- (area normalization). (Default: ``None``)
458
+ (area normalization). (Default: ``None``)
456
459
mel_scale (str, optional): Scale to use: ``htk`` or ``slaney``. (Default: ``htk``)
457
460
458
461
Example
0 commit comments