|
1 |
| - |
2 |
| -def assign_speakers_to_segments_from_global(chunk_path, segments, diarized_segments): |
3 |
| - from difflib import get_close_matches |
4 |
| - assigned_speakers = [] |
5 |
| - for seg in segments: |
6 |
| - midpoint = (seg['start'] + seg['end']) / 2 |
7 |
| - match = None |
8 |
| - for track, _, label in diarized_segments: |
9 |
| - normalized_label = str(label).strip().upper() # Normalize case and spacing |
10 |
| - if track.start <= midpoint <= track.end: |
11 |
| - match = normalized_label |
12 |
| - break |
13 |
| - |
14 |
| - if not match: |
15 |
| - match = "Speaker_0" |
16 |
| - assigned_speakers.append(match) |
17 |
| - logging.info(f"Speaker assignment breakdown: {Counter(assigned_speakers)}") |
18 |
| - return assigned_speakers |
19 |
| - |
20 |
| - |
21 | 1 | from datetime import datetime
|
22 | 2 | import re
|
23 | 3 | import os
|
@@ -100,6 +80,23 @@ def convert_to_wav(input_path):
|
100 | 80 | raise
|
101 | 81 | return output_path
|
102 | 82 |
|
| 83 | +def assign_speakers_to_segments_from_global(chunk_path, segments, diarized_segments): |
| 84 | + from difflib import get_close_matches |
| 85 | + assigned_speakers = [] |
| 86 | + for seg in segments: |
| 87 | + midpoint = (seg['start'] + seg['end']) / 2 |
| 88 | + match = None |
| 89 | + for track, _, label in diarized_segments: |
| 90 | + normalized_label = str(label).strip().upper() # Normalize case and spacing |
| 91 | + if track.start <= midpoint <= track.end: |
| 92 | + match = normalized_label |
| 93 | + break |
| 94 | + |
| 95 | + if not match: |
| 96 | + match = "Speaker_0" |
| 97 | + assigned_speakers.append(match) |
| 98 | + logging.info(f"Speaker assignment breakdown: {Counter(assigned_speakers)}") |
| 99 | + return assigned_speakers |
103 | 100 |
|
104 | 101 | # Applies noise reduction using noisereduce (conservative settings)
|
105 | 102 |
|
@@ -266,7 +263,7 @@ def assign_speakers_to_segments(full_audio_path, segments, hf_token, max_speaker
|
266 | 263 | """
|
267 | 264 | logging.info(f"Running speaker diarization on: {full_audio_path}")
|
268 | 265 | try:
|
269 |
| - pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=hf_token) |
| 266 | + pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=hf_token) |
270 | 267 | if torch.cuda.is_available():
|
271 | 268 | pipeline.to(torch.device("cuda"))
|
272 | 269 | # Run diarization once per full audio (global instead of per chunk)
|
|
0 commit comments