ashish0209's picture
Update app.py
522bcfc verified
import gradio as gr
from faster_whisper import WhisperModel
from time import time
import logging
import json
import requests
import os
import translators as ts
api_key = os.getenv("speech_recognition_summarizer")
if api_key is None:
print("API key not found. Make sure you have set the environment variable.")
# Initialize logging
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
CHOICES = [
"tiny", "tiny.en", "base",
"base.en", "small", "small.en",
"medium", "medium.en"
]
# Function to load model
def load_model(model):
download_path_int8 = "int8" # Adjust path as needed for Hugging Face Spaces
return WhisperModel(model, device="auto", compute_type="int8", download_root=download_path_int8)
# Current model (default to small)
current_model = load_model("small")
def transcribe(audio_file, model):
global current_model
# Load the model if different size is selected
if current_model.model != model:
current_model = load_model(model)
start = time()
segments, info = current_model.transcribe(
audio_file,
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=500),
)
# Prepare JSON output
transcript = [segment.text for segment in segments]
print(f"Time Taken to transcribe: {time() - start}")
print(transcript)
output = transcript
#y = json.dumps(output)
#x= [d["text"] for d in y["transcript"]]
global p
p = " ".join(transcript)
return json.dumps(output)
def summarize_text(max_length):
headers = {"Authorization": f"Bearer {api_key}"}
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
min_length = max_length // 4
payload = {
"inputs": p,
"parameters": {"min_length": min_length, "max_length": max_length}
}
response = requests.post(API_URL, headers=headers, json=payload)
summary = response.json()
return summary
# Create first interface for transcribed text
# Create second interface for summarization length
interface1 = gr.Interface(fn=transcribe,
inputs=[gr.Audio(type="filepath", label="Upload MP3 Audio File"),
gr.Dropdown(choices=CHOICES, value="small", label="Model")],
outputs=gr.JSON(label="Transcription with Timestamps"),
title="Whisper Transcription Service",
description="Upload an MP3 audio file to transcribe. Select the model. The output includes the transcription with timestamps.",
concurrency_limit=2)
interface2 = gr.Interface(fn=summarize_text,
inputs=[gr.Slider(value=60, label="Max Length for Text Summarization", minimum=10, maximum=500)],
outputs=gr.Textbox(label="Summarized Text", type="text", value="Summary will appear here"))
lst = [x for x in dir(interface1) if '__' not in x]
# Combine them using Blocks
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
interface1.render()
with gr.Column():
# interface2.render()
interface2.render()
demo.launch()