@@ -42,6 +42,49 @@ def assert_wav(self, dtype, sample_rate, num_channels, normalize, duration):
42
42
assert sr == sample_rate
43
43
self .assertEqual (data , expected )
44
44
45
+ def assert_24bit_wav (self , sample_rate , num_channels , normalize , duration ):
46
+ """ `sox_io_backend.load` can load 24-bit signed PCM wav format. Since torch does not support the ``int24`` dtype,
47
+ we implicitly cast the resulting tensor to the ``int32`` dtype.
48
+
49
+ It is not possible to use #assert_wav method above, as #get_wav_data does not support
50
+ the 'int24' dtype. This is because torch does not support the ``int24`` dtype.
51
+ Hence, we must use the following workaround.
52
+
53
+ x
54
+ |
55
+ | 1. Generate 24-bit wav with Sox.
56
+ |
57
+ v 2. Convert 24-bit wav to 32-bit wav with Sox.
58
+ wav(24-bit) ----------------------> wav(32-bit)
59
+ | |
60
+ | 3. Load 24-bit wav with torchaudio| 4. Load 32-bit wav with scipy
61
+ | |
62
+ v v
63
+ tensor ----------> x <----------- tensor
64
+ 5. Compare
65
+
66
+ # Underlying assumptions are:
67
+ # i. Sox properly converts from 24-bit to 32-bit
68
+ # ii. Loading 32-bit wav file with scipy is correct.
69
+ """
70
+ path = self .get_temp_path ('1.original.wav' )
71
+ ref_path = self .get_temp_path ('2.reference.wav' )
72
+
73
+ # 1. Generate 24-bit signed wav with Sox
74
+ sox_utils .gen_audio_file (
75
+ path , sample_rate , num_channels ,
76
+ bit_depth = 24 , duration = duration )
77
+
78
+ # 2. Convert from 24-bit wav to 32-bit wav with sox
79
+ sox_utils .convert_audio_file (path , ref_path , bit_depth = 32 )
80
+ # 3. Load 24-bit wav with torchaudio
81
+ data , sr = sox_io_backend .load (path , normalize = normalize )
82
+ # 4. Load 32-bit wav with scipy
83
+ data_ref = load_wav (ref_path , normalize = normalize )[0 ]
84
+ # 5. Compare
85
+ assert sr == sample_rate
86
+ self .assertEqual (data , data_ref , atol = 3e-03 , rtol = 1.3e-06 )
87
+
45
88
def assert_mp3 (self , sample_rate , num_channels , bit_rate , duration ):
46
89
"""`sox_io_backend.load` can load mp3 format.
47
90
@@ -50,7 +93,7 @@ def assert_mp3(self, sample_rate, num_channels, bit_rate, duration):
50
93
51
94
x
52
95
|
53
- | 1. Generate mp3 with Sox
96
+ | 1. Generate mp3 with Sox
54
97
|
55
98
v 2. Convert to wav with Sox
56
99
mp3 ------------------------------> wav
@@ -61,7 +104,7 @@ def assert_mp3(self, sample_rate, num_channels, bit_rate, duration):
61
104
tensor ----------> x <----------- tensor
62
105
5. Compare
63
106
64
- Underlying assumptions are;
107
+ Underlying assumptions are:
65
108
i. Conversion of mp3 to wav with Sox preserves data.
66
109
ii. Loading wav file with scipy is correct.
67
110
@@ -213,6 +256,15 @@ def test_wav(self, dtype, sample_rate, num_channels, normalize):
213
256
"""`sox_io_backend.load` can load wav format correctly."""
214
257
self .assert_wav (dtype , sample_rate , num_channels , normalize , duration = 1 )
215
258
259
+ @parameterized .expand (list (itertools .product (
260
+ [8000 , 16000 ],
261
+ [1 , 2 ],
262
+ [False , True ],
263
+ )), name_func = name_func )
264
+ def test_24bit_wav (self , sample_rate , num_channels , normalize ):
265
+ """`sox_io_backend.load` can load 24bit wav format correctly. Corectly casts it to ``int32`` tensor dtype."""
266
+ self .assert_24bit_wav (sample_rate , num_channels , normalize , duration = 1 )
267
+
216
268
@parameterized .expand (list (itertools .product (
217
269
['int16' ],
218
270
[16000 ],
0 commit comments