|
import whisper |
|
import openai |
|
import gradio as gr |
|
from gtts import gTTS |
|
from moviepy.editor import VideoFileClip |
|
import os |
|
|
|
openai.api_key = "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" |
|
|
|
def transcribe_video(video_path): |
|
|
|
video = VideoFileClip(video_path) |
|
audio_path = "temp_audio.wav" |
|
video.audio.write_audiofile(audio_path, codec='pcm_s16le') |
|
|
|
|
|
model = whisper.load_model("base") |
|
result = model.transcribe(audio_path) |
|
transcription = result["text"] |
|
|
|
|
|
os.remove(audio_path) |
|
return transcription |
|
|
|
def summarize_text(text): |
|
response = openai.Completion.create( |
|
engine="text-davinci-003", |
|
prompt=f"Summarize the following text:\n\n{text}", |
|
max_tokens=150 |
|
) |
|
summary = response.choices[0].text.strip() |
|
return summary |
|
|
|
def text_to_speech(text, language="en"): |
|
tts = gTTS(text=text, lang=language) |
|
tts.save("summary_audio.mp3") |
|
return "summary_audio.mp3" |
|
|
|
def process_video(video): |
|
|
|
transcription = transcribe_video(video) |
|
|
|
|
|
summary = summarize_text(transcription) |
|
|
|
|
|
audio_file = text_to_speech(summary) |
|
|
|
return transcription, summary, audio_file |
|
|
|
|
|
iface = gr.Interface( |
|
fn=process_video, |
|
inputs=gr.Video(label="Upload Video"), |
|
outputs=[ |
|
gr.Textbox(label="Transcription"), |
|
gr.Textbox(label="Summary"), |
|
gr.Audio(label="Summary Audio") |
|
], |
|
title="Video Transcription and Summarization", |
|
description="Upload a video file to transcribe and summarize its content." |
|
) |
|
|
|
|
|
iface.launch() |