MuhammadQASIM111's picture
Create app.py
a9392c2 verified
import whisper
from moviepy.editor import VideoFileClip
from transformers import BartForConditionalGeneration, BartTokenizer
import gradio as gr
import os
# Load models
whisper_model = whisper.load_model("base")
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
summarizer_model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
def transcribe_and_summarize(video_file):
try:
# Step 1: Process the video
clip = VideoFileClip(video_file)
audio_file = "audio.wav"
clip.audio.write_audiofile(audio_file)
# Step 2: Transcribe audio
result = whisper_model.transcribe(audio_file)
transcribed_text = result['text']
# Step 3: Summarize text
inputs = tokenizer(transcribed_text, return_tensors="pt", max_length=1024, truncation=True)
summary_ids = summarizer_model.generate(inputs["input_ids"], max_length=150, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
# Clean up the audio file
os.remove(audio_file)
return transcribed_text, summary
except Exception as e:
return str(e), "Error occurred during processing."
# Create Gradio interface
iface = gr.Interface(
fn=transcribe_and_summarize,
inputs=gr.Video(label="Upload Video"),
outputs=[
gr.Textbox(label="Transcribed Text", lines=10),
gr.Textbox(label="Summary", lines=5)
],
title="Video Transcription and Summarization",
description="Upload a video file to transcribe its audio and generate a summary of the transcribed text."
)
# Launch the interface
iface.launch()