From 2d9a534f178b9239c010e0696eb0a6231bef5f6d Mon Sep 17 00:00:00 2001 From: Furas Date: Wed, 4 Feb 2026 23:56:58 +0100 Subject: [PATCH] Live update --- voice_bot_live.py | 145 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 voice_bot_live.py diff --git a/voice_bot_live.py b/voice_bot_live.py new file mode 100644 index 0000000..b2fae14 --- /dev/null +++ b/voice_bot_live.py @@ -0,0 +1,145 @@ +import discord +from discord.ext import tasks, commands +from faster_whisper import WhisperModel +import os +import asyncio +import wave +import time + +# ============================ +# KONFIGURACJA MODELU (CPU) +# ============================ +print("⏳ Ładowanie Faster-Whisper...") +# 'tiny' jest błyskawiczny. 'base' jest mądrzejszy. +# Na CPU do live polecam 'tiny' lub 'base'. +# int8 zapewnia szybkość. +model = WhisperModel("base", device="cpu", compute_type="int8") +print("✅ Model gotowy!") + +# ============================ +# KLASA ODBIORNIKA (LIVE SINK) +# ============================ +class LiveSink(discord.sinks.Sink): + def __init__(self): + super().__init__() + self.user_buffers = {} # Tu trzymamy audio w RAMie + self.last_process_time = {} # Kiedy ostatnio sprawdzaliśmy usera + + @discord.sinks.Filters.container + def write(self, item): + # Ta funkcja dostaje surowe bajty audio (PCM) + user = item.user + if user not in self.user_buffers: + self.user_buffers[user] = bytearray() + self.last_process_time[user] = time.time() + + self.user_buffers[user] += item.data + + def get_audio_chunk(self, user): + # Pobierz audio i wyczyść bufor + if user in self.user_buffers: + data = self.user_buffers[user] + # Resetujemy bufor (tu można by zostawić kawałek dla ciągłości, ale keep it simple) + self.user_buffers[user] = bytearray() + self.last_process_time[user] = time.time() + return data + return None + +# ============================ +# BOT +# ============================ +intents = discord.Intents.default() +intents.message_content = True +bot = commands.Bot(command_prefix="!", intents=intents) + +# Globalna zmienna na nasz sink +current_sink = None + +async def transcode_and_transcribe(user_id, pcm_data): + # 1. Zapisz surowe bajty PCM do WAV (Whisper wymaga WAV) + temp_filename = f"live_{user_id}.wav" + + # Parametry Discorda: 48kHz, Stereo (2 kanały), 16-bit + with wave.open(temp_filename, 'wb') as wav_file: + wav_file.setnchannels(2) + wav_file.setsampwidth(2) + wav_file.setframerate(48000) + wav_file.writeframes(pcm_data) + + # 2. Transkrypcja Faster-Whisperem + try: + # Uruchamiamy to w executorze, żeby nie blokować bota + loop = asyncio.get_event_loop() + # Funkcja pomocnicza do uruchomienia w wątku + def run_whisper(): + segments, info = model.transcribe(temp_filename, language="pl", beam_size=1) + return " ".join([segment.text for segment in segments]) + + text = await loop.run_in_executor(None, run_whisper) + + if text.strip(): + user = await bot.fetch_user(user_id) + print(f"🔴 LIVE {user.name}: {text}") + + except Exception as e: + print(f"Błąd: {e}") + finally: + if os.path.exists(temp_filename): + os.remove(temp_filename) + +@tasks.loop(seconds=3.0) # Sprawdzaj co 3 sekundy +async def live_transcription_loop(): + if current_sink is None: + return + + # Iterujemy po kopii kluczy, bo słownik może się zmienić w trakcie + for user_id in list(current_sink.user_buffers.keys()): + # Jeśli uzbieraliśmy więcej niż 100KB danych (żeby nie mieli ciszy) + if len(current_sink.user_buffers[user_id]) > 150000: + pcm_data = current_sink.get_audio_chunk(user_id) + if pcm_data: + asyncio.create_task(transcode_and_transcribe(user_id, pcm_data)) + +@bot.event +async def on_ready(): + print(f'🚀 Bot Live gotowy: {bot.user}') + +@bot.command() +async def join(ctx): + if ctx.author.voice: + await ctx.author.voice.channel.connect() + await ctx.send("Dołączono.") + else: + await ctx.send("Wejdź na kanał.") + +@bot.command() +async def start(ctx): + global current_sink + if not ctx.voice_client: + return await ctx.send("Nie jestem połączony.") + + print("Rozpoczynam LIVE transkrypcję...") + current_sink = LiveSink() + + # Start nagrywania do naszego customowego Sinka + ctx.voice_client.start_recording( + current_sink, + lambda *args: None, # Callback końcowy nas nie obchodzi + ctx.channel + ) + + # Odpal pętlę sprawdzającą bufor + live_transcription_loop.start() + await ctx.send("Nasłuchuję w trybie LIVE...") + +@bot.command() +async def stop(ctx): + global current_sink + if ctx.voice_client: + ctx.voice_client.stop_recording() + live_transcription_loop.stop() + current_sink = None + await ctx.send("Zatrzymano.") + +token = os.getenv("DISCORD_TOKEN") +bot.run(token) \ No newline at end of file