Live update

2026-02-04 23:56:58 +01:00
parent f09f851e31
commit 2d9a534f17
1 changed files with 145 additions and 0 deletions
--- a/voice_bot_live.py
+++ b/voice_bot_live.py
@@ -0,0 +1,145 @@
 import discord
 from discord.ext import tasks, commands
 from faster_whisper import WhisperModel
 import os
 import asyncio
 import wave
 import time
 # ============================
 # KONFIGURACJA MODELU (CPU)
 # ============================
 print("⏳ Ładowanie Faster-Whisper...")
 # 'tiny' jest błyskawiczny. 'base' jest mądrzejszy. 
 # Na CPU do live polecam 'tiny' lub 'base'.
 # int8 zapewnia szybkość.
 model = WhisperModel("base", device="cpu", compute_type="int8")
 print("✅ Model gotowy!")
 # ============================
 # KLASA ODBIORNIKA (LIVE SINK)
 # ============================
 class LiveSink(discord.sinks.Sink):
    def __init__(self):
        super().__init__()
        self.user_buffers = {} # Tu trzymamy audio w RAMie
        self.last_process_time = {} # Kiedy ostatnio sprawdzaliśmy usera
    @discord.sinks.Filters.container
    def write(self, item):
        # Ta funkcja dostaje surowe bajty audio (PCM)
        user = item.user
        if user not in self.user_buffers:
            self.user_buffers[user] = bytearray()
            self.last_process_time[user] = time.time()
        self.user_buffers[user] += item.data
    def get_audio_chunk(self, user):
        # Pobierz audio i wyczyść bufor
        if user in self.user_buffers:
            data = self.user_buffers[user]
            # Resetujemy bufor (tu można by zostawić kawałek dla ciągłości, ale keep it simple)
            self.user_buffers[user] = bytearray()
            self.last_process_time[user] = time.time()
            return data
        return None
 # ============================
 # BOT
 # ============================
 intents = discord.Intents.default()
 intents.message_content = True
 bot = commands.Bot(command_prefix="!", intents=intents)
 # Globalna zmienna na nasz sink
 current_sink = None
 async def transcode_and_transcribe(user_id, pcm_data):
    # 1. Zapisz surowe bajty PCM do WAV (Whisper wymaga WAV)
    temp_filename = f"live_{user_id}.wav"
    # Parametry Discorda: 48kHz, Stereo (2 kanały), 16-bit
    with wave.open(temp_filename, 'wb') as wav_file:
        wav_file.setnchannels(2)
        wav_file.setsampwidth(2)
        wav_file.setframerate(48000)
        wav_file.writeframes(pcm_data)
    # 2. Transkrypcja Faster-Whisperem
    try:
        # Uruchamiamy to w executorze, żeby nie blokować bota
        loop = asyncio.get_event_loop()
        # Funkcja pomocnicza do uruchomienia w wątku
        def run_whisper():
            segments, info = model.transcribe(temp_filename, language="pl", beam_size=1)
            return " ".join([segment.text for segment in segments])
        text = await loop.run_in_executor(None, run_whisper)
        if text.strip():
            user = await bot.fetch_user(user_id)
            print(f"🔴 LIVE {user.name}: {text}")
    except Exception as e:
        print(f"Błąd: {e}")
    finally:
        if os.path.exists(temp_filename):
            os.remove(temp_filename)
@tasks.loop(seconds=3.0) # Sprawdzaj co 3 sekundy
 async def live_transcription_loop():
    if current_sink is None:
        return
    # Iterujemy po kopii kluczy, bo słownik może się zmienić w trakcie
    for user_id in list(current_sink.user_buffers.keys()):
        # Jeśli uzbieraliśmy więcej niż 100KB danych (żeby nie mieli ciszy)
        if len(current_sink.user_buffers[user_id]) > 150000: 
            pcm_data = current_sink.get_audio_chunk(user_id)
            if pcm_data:
                asyncio.create_task(transcode_and_transcribe(user_id, pcm_data))
@bot.event
 async def on_ready():
    print(f'🚀 Bot Live gotowy: {bot.user}')
@bot.command()
 async def join(ctx):
    if ctx.author.voice:
        await ctx.author.voice.channel.connect()
        await ctx.send("Dołączono.")
    else:
        await ctx.send("Wejdź na kanał.")
@bot.command()
 async def start(ctx):
    global current_sink
    if not ctx.voice_client:
        return await ctx.send("Nie jestem połączony.")
    print("Rozpoczynam LIVE transkrypcję...")
    current_sink = LiveSink()
    # Start nagrywania do naszego customowego Sinka
    ctx.voice_client.start_recording(
        current_sink,
        lambda *args: None, # Callback końcowy nas nie obchodzi
        ctx.channel
    )
    # Odpal pętlę sprawdzającą bufor
    live_transcription_loop.start()
    await ctx.send("Nasłuchuję w trybie LIVE...")
@bot.command()
 async def stop(ctx):
    global current_sink
    if ctx.voice_client:
        ctx.voice_client.stop_recording()
        live_transcription_loop.stop()
        current_sink = None
        await ctx.send("Zatrzymano.")
 token = os.getenv("DISCORD_TOKEN")
 bot.run(token)