Rapptz · blord0 · Jul 16, 2025 · Jul 16, 2025 · Jul 16, 2025 · Jul 16, 2025
diff --git a/discord/abc.py b/discord/abc.py
@@ -74,7 +74,7 @@
 T = TypeVar('T', bound=VoiceProtocol)

 if TYPE_CHECKING:
    from typing_extensions import Self

    from .client import Client
    from .user import ClientUser
@@ -1624,12 +1624,21 @@
         if view and not hasattr(view, '__discord_ui_view__'):
             raise TypeError(f'view parameter must be View not {view.__class__.__name__}')
 
-        if suppress_embeds or silent:
+        voice = False
+        if file is not None and file.voice:
+            if content is not None:
+                raise TypeError('Cannot send content with a voice message')
+            if embed is not None or embeds is not None:
+                raise TypeError('Cannot send embeds with a voice message')
+            voice = True
+
+        if suppress_embeds or silent or voice:
             from .message import MessageFlags  # circular import
 
             flags = MessageFlags._from_value(0)
             flags.suppress_embeds = suppress_embeds
             flags.suppress_notifications = silent
+            flags.voice = voice
         else:
             flags = MISSING
 

diff --git a/discord/file.py b/discord/file.py
@@ -27,6 +27,10 @@
 
 import os
 import io
+import base64
+from .oggparse import OggStream
+from .opus import Decoder
+import struct
 
 from .utils import MISSING
 
@@ -75,9 +79,37 @@ class File:
         The file description to display, currently only supported for images.
 
         .. versionadded:: 2.0
+
+    voice: :class:`bool`
+        Whether the file is a voice message. If left unspecified, the :attr:`~File.duration` is used
+        to determine if the file is a voice message.
+
+        .. note::
+
+            Voice files must be an audio only format.
+
+            A *non-exhaustive* list of supported formats are: `ogg`, `mp3`, `wav`, `aac`, and `flac`.
+
+        .. versionadded:: 2.6
+
+    duration: Optional[:class:`float`]
+        The duration of the voice message in seconds
+
+        .. versionadded:: 2.6
     """
 
-    __slots__ = ('fp', '_filename', 'spoiler', 'description', '_original_pos', '_owner', '_closer')
+    __slots__ = (
+        'fp',
+        '_filename',
+        'spoiler',
+        'description',
+        '_original_pos',
+        '_owner',
+        '_closer',
+        'duration',
+        '_waveform',
+        'voice',
+    )
 
     def __init__(
         self,
@@ -86,6 +118,9 @@ def __init__(
         *,
         spoiler: bool = MISSING,
         description: Optional[str] = None,
+        voice: bool = MISSING,
+        duration: Optional[float] = None,
+        waveform: Optional[str] = None,
     ):
         if isinstance(fp, io.IOBase):
             if not (fp.seekable() and fp.readable()):
@@ -117,6 +152,15 @@ def __init__(
 
         self.spoiler: bool = spoiler
         self.description: Optional[str] = description
+        self.duration = duration
+        self._waveform = waveform
+
+        if voice is MISSING:
+            voice = duration is not None
+        self.voice = voice
+
+        if duration is None and voice:
+            raise TypeError('Voice messages must have a duration')
 
     @property
     def filename(self) -> str:
@@ -126,6 +170,24 @@ def filename(self) -> str:
         """
         return 'SPOILER_' + self._filename if self.spoiler else self._filename
 
+    @property
+    def waveform(self) -> str:
+        """:class:`str`: The waveform data for the voice message.
+
+        .. note::
+            If a waveform was not given, it will be generated
+
+            Only supports generating the waveform for Opus format files, other files will be given a random waveform
+
+        .. versionadded:: 2.6"""
+        if self._waveform is None:
+            try:
+                self._waveform = self.generate_waveform()
+            except Exception:
+                self._waveform = base64.b64encode(os.urandom(256)).decode('utf-8')
+            self.reset()
+        return self._waveform
+
     @filename.setter
     def filename(self, value: str) -> None:
         self._filename, self.spoiler = _strip_spoiler(value)
@@ -156,4 +218,63 @@ def to_dict(self, index: int) -> Dict[str, Any]:
         if self.description is not None:
             payload['description'] = self.description
 
+        if self.voice:
+            payload['duration_secs'] = self.duration
+            payload['waveform'] = self.waveform
+
         return payload
+
+    def generate_waveform(self) -> str:
+        if not self.voice:
+            raise TypeError("Cannot produce waveform for non voice file")
+        self.reset()
+        ogg = OggStream(self.fp)  # type: ignore
+        decoder = Decoder()
+        waveform: list[int] = []
+        prefixes = [b'OpusHead', b'OpusTags']
+        for packet in ogg.iter_packets():
+            if packet[:8] in prefixes:
+                continue
+
+            if b'vorbis' in packet:
+                raise TypeError("File format is 'vorbis'. Format of 'opus' is required for waveform generation")
+
+            # these are PCM bytes in 16-bit signed little-endian form
+            decoded = decoder.decode(packet, fec=False)
+
+            # 16 bits -> 2 bytes per sample
+            num_samples = len(decoded) // 2
+
+            # https://docs.python.org/3/library/struct.html#byte-order-size-and-alignment
+            format = '<' + 'h' * num_samples
+            samples: tuple[int] = struct.unpack(format, decoded)
+
+            waveform.extend(samples)
+
+        # Make sure all values are positive
+        for i in range(len(waveform)):
+            if waveform[i] < 0:
+                waveform[i] = -waveform[i]
+
+        point_count: int = self.duration * 10  # type: ignore
+        point_count = min(point_count, 255)
+        points_per_sample: int = len(waveform) // point_count
+        sample_waveform: list[int] = []
+
+        total, count = 0, 0
+        # Average out the amplitudes for each point within a sample
+        for i in range(len(waveform)):
+            total += waveform[i]
+            count += 1
+            if i % points_per_sample == 0:
+                sample_waveform.append(total // count)
+                total, count = 0, 0
+
+        # Maximum value of a waveform is 0xff (255)
+        highest = max(sample_waveform)
+        mult = 255 / highest
+        for i in range(len(sample_waveform)):
+            sample_waveform[i] = int(sample_waveform[i] * mult)
+
+        print(len(sample_waveform))
+        return base64.b64encode(bytes(sample_waveform)).decode('utf-8')