Skip to content

Commit c9b855c

Browse files
committed
modifications
1 parent 7335d77 commit c9b855c

File tree

1 file changed

+18
-21
lines changed

1 file changed

+18
-21
lines changed

docs/whisper_transcription/whisper_code.py

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,3 @@
1-
2-
def assign_speakers_to_segments_from_global(chunk_path, segments, diarized_segments):
3-
from difflib import get_close_matches
4-
assigned_speakers = []
5-
for seg in segments:
6-
midpoint = (seg['start'] + seg['end']) / 2
7-
match = None
8-
for track, _, label in diarized_segments:
9-
normalized_label = str(label).strip().upper() # Normalize case and spacing
10-
if track.start <= midpoint <= track.end:
11-
match = normalized_label
12-
break
13-
14-
if not match:
15-
match = "Speaker_0"
16-
assigned_speakers.append(match)
17-
logging.info(f"Speaker assignment breakdown: {Counter(assigned_speakers)}")
18-
return assigned_speakers
19-
20-
211
from datetime import datetime
222
import re
233
import os
@@ -100,6 +80,23 @@ def convert_to_wav(input_path):
10080
raise
10181
return output_path
10282

83+
def assign_speakers_to_segments_from_global(chunk_path, segments, diarized_segments):
84+
from difflib import get_close_matches
85+
assigned_speakers = []
86+
for seg in segments:
87+
midpoint = (seg['start'] + seg['end']) / 2
88+
match = None
89+
for track, _, label in diarized_segments:
90+
normalized_label = str(label).strip().upper() # Normalize case and spacing
91+
if track.start <= midpoint <= track.end:
92+
match = normalized_label
93+
break
94+
95+
if not match:
96+
match = "Speaker_0"
97+
assigned_speakers.append(match)
98+
logging.info(f"Speaker assignment breakdown: {Counter(assigned_speakers)}")
99+
return assigned_speakers
103100

104101
# Applies noise reduction using noisereduce (conservative settings)
105102

@@ -266,7 +263,7 @@ def assign_speakers_to_segments(full_audio_path, segments, hf_token, max_speaker
266263
"""
267264
logging.info(f"Running speaker diarization on: {full_audio_path}")
268265
try:
269-
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=hf_token)
266+
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=hf_token)
270267
if torch.cuda.is_available():
271268
pipeline.to(torch.device("cuda"))
272269
# Run diarization once per full audio (global instead of per chunk)

0 commit comments

Comments
 (0)