Skip to content

Commit bc65767

Browse files
committed
add name of paper before reference.
1 parent e8bae86 commit bc65767

File tree

7 files changed

+21
-14
lines changed

7 files changed

+21
-14
lines changed

torchaudio/functional/functional.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,8 @@ def griffinlim(
153153
r"""Compute waveform from a linear scale magnitude spectrogram using the Griffin-Lim transformation.
154154
155155
Implementation ported from
156-
:footcite:`brian_mcfee-proc-scipy-2015`, :footcite:`6701851` and :footcite:`1172092`.
156+
*librosa* [:footcite:`brian_mcfee-proc-scipy-2015`], *A fast Griffin-Lim algorithm* [:footcite:`6701851`]
157+
and *Signal estimation from modified short-time Fourier transform* [:footcite:`1172092`].
157158
158159
Args:
159160
specgram (Tensor): A magnitude-only STFT spectrogram of dimension (..., freq, frames)
@@ -1201,7 +1202,8 @@ def compute_kaldi_pitch(
12011202
recompute_frame: int = 500,
12021203
snip_edges: bool = True,
12031204
) -> torch.Tensor:
1204-
"""Extract pitch based on method described in :footcite:`6854049`.
1205+
"""Extract pitch based on method described in *A pitch extraction algorithm tuned
1206+
for automatic speech recognition* [:footcite:`6854049`].
12051207
12061208
This function computes the equivalent of `compute-kaldi-pitch-feats` from Kaldi.
12071209

torchaudio/models/conv_tasnet.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,9 @@ def forward(self, input: torch.Tensor) -> torch.Tensor:
164164

165165

166166
class ConvTasNet(torch.nn.Module):
167-
"""Conv-TasNet: a fully-convolutional time-domain audio separation network :footcite:`Luo_2019`.
167+
"""Conv-TasNet: a fully-convolutional time-domain audio separation network
168+
*Conv-TasNet: Surpassing Ideal Time–Frequency Magnitude Masking for Speech Separation*
169+
[:footcite:`Luo_2019`].
168170
169171
Args:
170172
num_sources (int): The number of sources to split.

torchaudio/models/deepspeech.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
3131

3232
class DeepSpeech(torch.nn.Module):
3333
"""
34-
DeepSpeech model architecture from :footcite:`hannun2014deep`.
34+
DeepSpeech model architecture from *Deep Speech: Scaling up end-to-end speech recognition*
35+
[:footcite:`hannun2014deep`].
3536
3637
Args:
3738
n_feature: Number of input features

torchaudio/models/wav2letter.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88

99
class Wav2Letter(nn.Module):
10-
r"""Wav2Letter model architecture from :footcite:`collobert2016wav2letter`.
10+
r"""Wav2Letter model architecture from *Wav2Letter: an End-to-End ConvNet-based Speech
11+
Recognition System* [:footcite:`collobert2016wav2letter`].
1112
1213
:math:`\text{padding} = \frac{\text{ceil}(\text{kernel} - \text{stride})}{2}`
1314

torchaudio/models/wav2vec2/model.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88

99
class Wav2Vec2Model(Module):
10-
"""Encoder model used in [:footcite:`baevski2020wav2vec`].
10+
"""Encoder model used in *wav2vec 2.0* [:footcite:`baevski2020wav2vec`].
1111
1212
Note:
1313
To build the model, please use one of the factory functions.
@@ -122,7 +122,7 @@ def _get_model(
122122

123123

124124
def wav2vec2_base(num_out: int) -> Wav2Vec2Model:
125-
"""Build wav2vec2.0 model with "Base" configuration from [:footcite:`baevski2020wav2vec`].
125+
"""Build wav2vec2.0 model with "Base" configuration from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`].
126126
127127
Args:
128128
num_out: int
@@ -164,7 +164,7 @@ def wav2vec2_base(num_out: int) -> Wav2Vec2Model:
164164

165165

166166
def wav2vec2_large(num_out: int) -> Wav2Vec2Model:
167-
"""Build wav2vec2.0 model with "Large" configuration from [:footcite:`baevski2020wav2vec`].
167+
"""Build wav2vec2.0 model with "Large" configuration from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`].
168168
169169
Args:
170170
num_out: int
@@ -206,7 +206,7 @@ def wav2vec2_large(num_out: int) -> Wav2Vec2Model:
206206

207207

208208
def wav2vec2_large_lv60k(num_out: int) -> Wav2Vec2Model:
209-
"""Build wav2vec2.0 model with "Large LV-60k" configuration from [:footcite:`baevski2020wav2vec`].
209+
"""Build wav2vec2.0 model with "Large LV-60k" configuration from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`].
210210
211211
Args:
212212
num_out: int

torchaudio/models/wavernn.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515

1616
class ResBlock(nn.Module):
17-
r"""ResNet block based on :footcite:`kalchbrenner2018efficient`.
17+
r"""ResNet block based on *Efficient Neural Audio Synthesis* [:footcite:`kalchbrenner2018efficient`].
1818
1919
Args:
2020
n_freq: the number of bins in a spectrogram. (Default: ``128``)
@@ -202,9 +202,9 @@ def forward(self, specgram: Tensor) -> Tuple[Tensor, Tensor]:
202202
class WaveRNN(nn.Module):
203203
r"""WaveRNN model based on the implementation from `fatchord <https://github.com/fatchord/WaveRNN>`_.
204204
205-
The original implementation was introduced in :footcite:`kalchbrenner2018efficient`.
206-
The input channels of waveform and spectrogram have to be 1. The product of
207-
`upsample_scales` must equal `hop_length`.
205+
The original implementation was introduced in *Efficient Neural Audio Synthesis*
206+
[:footcite:`kalchbrenner2018efficient`]. The input channels of waveform and spectrogram have to be 1.
207+
The product of `upsample_scales` must equal `hop_length`.
208208
209209
Args:
210210
upsample_scales: the list of upsample scales.

torchaudio/transforms.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,8 @@ class GriffinLim(torch.nn.Module):
127127
r"""Compute waveform from a linear scale magnitude spectrogram using the Griffin-Lim transformation.
128128
129129
Implementation ported from
130-
:footcite:`brian_mcfee-proc-scipy-2015`, :footcite:`6701851` and :footcite:`1172092`.
130+
*librosa* [:footcite:`brian_mcfee-proc-scipy-2015`], *A fast Griffin-Lim algorithm* [:footcite:`6701851`]
131+
and *Signal estimation from modified short-time Fourier transform* [:footcite:`1172092`].
131132
132133
Args:
133134
n_fft (int, optional): Size of FFT, creates ``n_fft // 2 + 1`` bins. (Default: ``400``)

0 commit comments

Comments
 (0)