Spaces:
Sleeping
Sleeping
import gradio as gr | |
from faster_whisper import WhisperModel | |
from pydub import AudioSegment | |
import os | |
import tempfile | |
from transformers import pipeline | |
# ืืืืจืช ืืืืื ืืชืืืื | |
model = WhisperModel("ivrit-ai/faster-whisper-v2-d4") | |
# ืืืืจืช pipeline ืืกืืืื ืขื ืืืื ืืืชืื ืืขืืจืืช | |
summarizer = pipeline("summarization", model="yam-peleg/Hebrew-Mistral-7B-200K") | |
def transcribe_and_summarize(file_path): | |
try: | |
# ืืืืงื ืื ืืงืืืฅ ืืื ืืืืื ืืืืจืช ืืืืื ืืืืืื ืืืืืช ืืฆืืจื | |
if file_path.endswith((".mp4", ".mov", ".avi", ".mkv")): | |
audio_file = convert_video_to_audio(file_path) | |
else: | |
audio_file = file_path | |
# ืชืืืื ืืืืืื | |
segments, _ = model.transcribe(audio_file, language="he") | |
transcript = " ".join([segment.text for segment in segments]) | |
# ืกืืืื ืืชืืืื ืขื ืืืื ืืขืืจืืช | |
prompt_text = f"ืกืื ืืช ืืชืืืื ืืื ืืฉืืขืืจ ืืงืืื ืืขืืจืืช:\n{transcript}" | |
summary = summarizer(transcript)[0]["summary_text"] | |
# ืืืืงืช ืงืืืฅ ืืืืืื ืืืืืช ืืฆืืจื (ืื ืืื ืืืืื) | |
if audio_file != file_path: | |
os.remove(audio_file) | |
return transcript, summary | |
except Exception as e: | |
return f"ืฉืืืื ืืขืืืื ืืงืืืฅ: {str(e)}", "" | |
def convert_video_to_audio(video_file): | |
# ืืฆืืจืช ืงืืืฅ ืืืืื ืืื ื | |
temp_audio = tempfile.mktemp(suffix=".wav") | |
video = AudioSegment.from_file(video_file) | |
video.export(temp_audio, format="wav") | |
return temp_audio | |
# ืืืืจืช ืืืฉืง Gradio | |
interface = gr.Interface( | |
fn=transcribe_and_summarize, | |
inputs=gr.Audio(type="filepath"), | |
outputs=[ | |
gr.Textbox(label="ืชืืืื"), | |
gr.Textbox(label="ืกืืืื") | |
], | |
title="ืืืืจ ืืืืื/ืืืืื ืืชืืืื ืืกืืืื", | |
description="ืืขืื ืงืืืฅ ืืืืื ืื ืืืืื ืฉื ืืจืฆื ืืงืื ืชืืืื ืืื ืืกืืืื ืงืฆืจ ืฉื ืืชืืื." | |
) | |
if __name__ == "__main__": | |
interface.launch() | |