Stt / app.py
archivartaunik's picture
Update app.py
14becec verified
import os
import json
import mimetypes
import gradio as gr
import google.generativeai as genai
from pydub import AudioSegment
import uuid
import time
import threading
# -----------------------
# Канфігурацыя
# -----------------------
GEMINI_API_KEY = os.getenv("gemini")
if not GEMINI_API_KEY:
raise ValueError("Не знойдзены API ключ для Gemini.")
genai.configure(api_key=GEMINI_API_KEY)
generation_config = {
"temperature": 0.35,
"top_p": 0.95,
"top_k": 64,
"max_output_tokens": 65536,
"response_mime_type": "application/json",
}
model = genai.GenerativeModel(
model_name="gemini-2.5-flash",
generation_config=generation_config,
system_instruction="""transcribe
format example:
"start": 858.37,
"end": 859.56,
"text": "Калі бяспечна, безумоўна."
""",
)
# -----------------------
# Прагрэс-анімацыя (фонавы паток)
# -----------------------
def progress_animation(status_callback, stop_event):
frames = ["⏳", "⏳.", "⏳..", "⏳..."]
while not stop_event.is_set():
for frame in frames:
if stop_event.is_set():
break
status_callback(f"Транскрыпцыя ідзе {frame}")
time.sleep(0.6)
# -----------------------
# Асноўныя функцыі
# -----------------------
def upload_to_gemini(path, status_callback):
mime_type, _ = mimetypes.guess_type(path)
status_callback(f"📤 Загружаем файл у Gemini...")
file = genai.upload_file(path, mime_type=mime_type)
status_callback("✅ Файл загружаны.")
return file
def transcribe_audio(audio_path, status_callback):
try:
status_callback("🔍 Падрыхтоўка транскрыпцыі...")
file_obj = upload_to_gemini(audio_path, status_callback)
stop_event = threading.Event()
t = threading.Thread(target=progress_animation, args=(status_callback, stop_event))
t.start()
chat = model.start_chat(history=[])
response = chat.send_message(file_obj)
stop_event.set()
t.join()
if not response.text:
return "❌ Пусты адказ ад мадэлі."
with open("last_response.json", "w", encoding="utf-8") as f:
f.write(response.text)
status_callback("📥 Апрацоўка транскрыпцыі...")
transcripts = json.loads(response.text)
status_callback(f"✅ Гатова: {len(transcripts)} фрагментаў.")
return transcripts
except Exception as e:
return f"Памылка: {e}"
def seconds_to_timestamp(sec: float) -> str:
h, remainder = divmod(sec, 3600)
m, remainder = divmod(remainder, 60)
s = int(remainder)
ms = int(round((remainder - s) * 1000))
return f"{int(h):02d}:{int(m):02d}:{s:02d},{ms:03d}"
def transcripts_to_srt(transcripts, filename="subtitles.srt"):
try:
srt_lines = []
for idx, seg in enumerate(transcripts, start=1):
start_ts = seconds_to_timestamp(seg["start"])
end_ts = seconds_to_timestamp(seg["end"])
srt_lines.append(f"{idx}\n{start_ts} --> {end_ts}\n{seg['text']}\n")
content = "\n".join(srt_lines)
with open(filename, "w", encoding="utf-8") as f:
f.write(content)
return content, filename
except Exception as e:
return f"Памылка пры запісе SRT: {e}", None
def extract_audio_from_video(video_file, status_callback):
status_callback("🎞 Вылучаем аўдыё з відэа...")
audio = AudioSegment.from_file(video_file)
path = f"extracted_{uuid.uuid4().hex}.mp3"
audio.export(path, format="mp3")
status_callback("✅ Аўдыё вылучана.")
return path
def process_audio(audio_path, status_callback):
result = transcribe_audio(audio_path, status_callback)
if not isinstance(result, list):
return f"Памылка: {result}", None
status_callback("📝 Канвертацыя ў SRT...")
content, filename = transcripts_to_srt(result)
status_callback("✅ SRT-файл гатовы.")
return content, filename
def process_video(video_path, status_callback):
audio_path = extract_audio_from_video(video_path, status_callback)
return process_audio(audio_path, status_callback)
def process_file(audio, video, status_callback):
status_callback("🔄 Пачатак апрацоўкі...")
if audio:
return process_audio(audio, status_callback)
elif video:
return process_video(video, status_callback)
return "Няма файла для апрацоўкі.", None
# -----------------------
# Gradio UI
# -----------------------
with gr.Blocks() as demo:
gr.Markdown(
"""
## Загрузіце аўдыёфайл або відэафайл. Субцітры будуць згенераваны разам з файлам субцітраў.
[Ёсць пытанні ці прапановы? Далучайцеся да беларускаймоўнай суполкі tuteishygpt ](https://t.me/SHibelChat)
**Хочаце каб сэрвіс працаваў? Налівайце каву! :** [Buy me a coffee](https://buymeacoffee.com/tuteishygpt)
**Агучце беларускую мову тут :** [Беларуская мадэль маўлення](https://huggingface.co/spaces/archivartaunik/Bextts)
"""
)
with gr.Row():
audio_input = gr.Audio(type="filepath", label="🎙 Аўдыёфайл")
video_input = gr.Video(label="🎥 Відэафайл") # вяртае str
btn = gr.Button("🚀 Апрацаваць")
with gr.Row():
transcript_output = gr.Textbox(label="📄 SRT-транскрыпцыя", lines=10)
file_output = gr.File(label="⬇️ SRT-файл")
status_output = gr.Textbox(label="🛠️ Статус", interactive=False)
def wrapped_process(audio, video):
def update_status(text):
status_output.value = text
return process_file(audio, video, update_status)
btn.click(
fn=wrapped_process,
inputs=[audio_input, video_input],
outputs=[transcript_output, file_output],
)
demo.launch()