Spaces:

ashish0209
/

Speech_Recognition_and_Summarizer

Sleeping

App Files Files Community

Speech_Recognition_and_Summarizer / app.py

ashish0209

Update app.py

522bcfc verified 6 months ago

raw

history blame contribute delete

3.28 kB

	import gradio as gr
	from faster_whisper import WhisperModel
	from time import time
	import logging
	import json
	import requests
	import os
	import translators as ts


	api_key = os.getenv("speech_recognition_summarizer")
	if api_key is None:
	print("API key not found. Make sure you have set the environment variable.")

	# Initialize logging
	logging.basicConfig()
	logging.getLogger("faster_whisper").setLevel(logging.DEBUG)

	CHOICES = [
	"tiny", "tiny.en", "base",
	"base.en", "small", "small.en",
	"medium", "medium.en"
	]

	# Function to load model
	def load_model(model):
	download_path_int8 = "int8" # Adjust path as needed for Hugging Face Spaces
	return WhisperModel(model, device="auto", compute_type="int8", download_root=download_path_int8)

	# Current model (default to small)
	current_model = load_model("small")

	def transcribe(audio_file, model):
	global current_model

	# Load the model if different size is selected
	if current_model.model != model:
	current_model = load_model(model)

	start = time()
	segments, info = current_model.transcribe(
	audio_file,
	vad_filter=True,
	vad_parameters=dict(min_silence_duration_ms=500),
	)

	# Prepare JSON output
	transcript = [segment.text for segment in segments]



	print(f"Time Taken to transcribe: {time() - start}")
	print(transcript)
	output = transcript
	#y = json.dumps(output)
	#x= [d["text"] for d in y["transcript"]]

	global p
	p = " ".join(transcript)

	return json.dumps(output)

	def summarize_text(max_length):
	headers = {"Authorization": f"Bearer {api_key}"}
	API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
	min_length = max_length // 4

	payload = {
	"inputs": p,
	"parameters": {"min_length": min_length, "max_length": max_length}
	}

	response = requests.post(API_URL, headers=headers, json=payload)
	summary = response.json()
	return summary




	# Create first interface for transcribed text


	# Create second interface for summarization length




	interface1 = gr.Interface(fn=transcribe,
	inputs=[gr.Audio(type="filepath", label="Upload MP3 Audio File"),
	gr.Dropdown(choices=CHOICES, value="small", label="Model")],
	outputs=gr.JSON(label="Transcription with Timestamps"),
	title="Whisper Transcription Service",
	description="Upload an MP3 audio file to transcribe. Select the model. The output includes the transcription with timestamps.",
	concurrency_limit=2)

	interface2 = gr.Interface(fn=summarize_text,
	inputs=[gr.Slider(value=60, label="Max Length for Text Summarization", minimum=10, maximum=500)],
	outputs=gr.Textbox(label="Summarized Text", type="text", value="Summary will appear here"))

	lst = [x for x in dir(interface1) if '__' not in x]
	# Combine them using Blocks
	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	interface1.render()
	with gr.Column():
	# interface2.render()
	interface2.render()
	demo.launch()