|
import gradio as gr |
|
from faster_whisper import WhisperModel |
|
from time import time |
|
import logging |
|
import json |
|
import requests |
|
import os |
|
import translators as ts |
|
|
|
|
|
api_key = os.getenv("speech_recognition_summarizer") |
|
if api_key is None: |
|
print("API key not found. Make sure you have set the environment variable.") |
|
|
|
|
|
logging.basicConfig() |
|
logging.getLogger("faster_whisper").setLevel(logging.DEBUG) |
|
|
|
CHOICES = [ |
|
"tiny", "tiny.en", "base", |
|
"base.en", "small", "small.en", |
|
"medium", "medium.en" |
|
] |
|
|
|
|
|
def load_model(model): |
|
download_path_int8 = "int8" |
|
return WhisperModel(model, device="auto", compute_type="int8", download_root=download_path_int8) |
|
|
|
|
|
current_model = load_model("small") |
|
|
|
def transcribe(audio_file, model): |
|
global current_model |
|
|
|
|
|
if current_model.model != model: |
|
current_model = load_model(model) |
|
|
|
start = time() |
|
segments, info = current_model.transcribe( |
|
audio_file, |
|
vad_filter=True, |
|
vad_parameters=dict(min_silence_duration_ms=500), |
|
) |
|
|
|
|
|
transcript = [segment.text for segment in segments] |
|
|
|
|
|
|
|
print(f"Time Taken to transcribe: {time() - start}") |
|
print(transcript) |
|
output = transcript |
|
|
|
|
|
|
|
global p |
|
p = " ".join(transcript) |
|
|
|
return json.dumps(output) |
|
|
|
def summarize_text(max_length): |
|
headers = {"Authorization": f"Bearer {api_key}"} |
|
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn" |
|
min_length = max_length // 4 |
|
|
|
payload = { |
|
"inputs": p, |
|
"parameters": {"min_length": min_length, "max_length": max_length} |
|
} |
|
|
|
response = requests.post(API_URL, headers=headers, json=payload) |
|
summary = response.json() |
|
return summary |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
interface1 = gr.Interface(fn=transcribe, |
|
inputs=[gr.Audio(type="filepath", label="Upload MP3 Audio File"), |
|
gr.Dropdown(choices=CHOICES, value="small", label="Model")], |
|
outputs=gr.JSON(label="Transcription with Timestamps"), |
|
title="Whisper Transcription Service", |
|
description="Upload an MP3 audio file to transcribe. Select the model. The output includes the transcription with timestamps.", |
|
concurrency_limit=2) |
|
|
|
interface2 = gr.Interface(fn=summarize_text, |
|
inputs=[gr.Slider(value=60, label="Max Length for Text Summarization", minimum=10, maximum=500)], |
|
outputs=gr.Textbox(label="Summarized Text", type="text", value="Summary will appear here")) |
|
|
|
lst = [x for x in dir(interface1) if '__' not in x] |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
with gr.Column(): |
|
interface1.render() |
|
with gr.Column(): |
|
|
|
interface2.render() |
|
demo.launch() |
|
|