Spaces:

Gladiaio
/

Audio-Transcription

Runtime error

App Files Files Community

Audio-Transcription / app.py

jilijeanlouis

Update app.py

bcf8a6e about 2 years ago

raw

history blame contribute delete

3.84 kB

	import os
	from time import time

	import gradio as gr
	import requests

	from languages import LANGUAGES

	GLADIA_API_KEY = os.environ.get("GLADIA_API_KEY")

	headers = {
	"accept": "application/json",
	"x-gladia-key": GLADIA_API_KEY,
	}

	ACCEPTED_LANGUAGE_BEHAVIOUR = [
	"manual",
	"automatic single language",
	"automatic multiple languages",
	]


	def transcribe(
	audio: str = None,
	) -> dict:
	"""
	This function transcribes audio to text using the Gladia API.
	It sends a request to the API with the given audio file or audio URL, and returns the transcribed text.
	Get your api key at gladia.io !

	Parameters:
	audio (str): The path to the audio file to transcribe.

	Returns:
	dict: A dictionary containing the transcribed text and other metadata about the transcription process. If an error occurs, the function returns a string with an error message.
	"""
	DEFAULT_MANUAL_LANGUAGE = "english"

	language_behaviour = ACCEPTED_LANGUAGE_BEHAVIOUR[2]

	# if video file is there then send the audio field as the content of the video

	# if video file is there then send the audio field as the content of the video
	files = {
	"language_behaviour": (None, language_behaviour),
	"noise_reduction": (None, "false"),
	'output_format': (None, 'json'),
	'toggle_diarization': (None, 'true'),
	'diarization_max_speakers': (None, '2'),
	}

	# priority given to the audio or video
	if audio:
	files["audio"] = (audio, open(audio, "rb"), "audio/wav")

	# if language is manual then send the language field
	# if it's there for language_behaviour == automatic*
	# it will ignored anyways
	if language_behaviour == "manual":
	files["language"] = (None, DEFAULT_MANUAL_LANGUAGE)

	start_transfer = time()
	response = requests.post(
	"https://api.gladia.io/audio/text/audio-transcription/",
	headers=headers,
	files=files,
	)
	end_transfer = time()

	if response.status_code != 200:
	print(response.content, response.status_code)

	return "Sorry, an error occured with your request :/"

	# we have 2 outputs:
	# prediction and prediction_raw
	# prediction_raw has more details about the processing
	# and other debugging detailed element you might be
	# interested in


	segments = response.json()["prediction"]

	output = ""
	current_speaker = ""
	for segment in segments:
	if segment["speaker"] != current_speaker and segment["speaker"]!= "unknown":
	current_speaker = segment["speaker"]
	output = output + "<br/><br/><b> Speaker:" + str(segment["speaker"]) + ":</b> " + segment["transcription"]
	else:
	output = output + " " + segment["transcription"]


	return output, response.json()["prediction_raw"]



	iface = gr.Interface(
	title="Gladia.io fast audio transcription",
	description="""Gladia.io Whisper large-v2 fast audio transcription API
	is able to perform fast audio transcriptions for any audio / video (less than a minute per hour) .<br/>For more details and a benchmark ran on multiple Speech-To-Text providers, please visit
	[our post](https://medium.com/@gladia.io/gladia-alpha-launch-redefining-what-s-possible-with-speech-to-text-ai-686dd4312a86) on Medium.
	<br/><br/>
	You are more than welcome to join us on [Slack](https://gladia-io.slack.com)
	and don't forget to get your own API key on [Gladia.io](https://gladia.io/) during the free alpha !
	""",
	fn=transcribe,
	inputs=[
	gr.Audio(label="Audio file", source="upload", type="filepath"),
	],
	outputs=["html", "json"],
	examples=[
	["examples/good.will.hunting.wav"],
	["examples/wolf.of.wall.street.wav"],
	],
	)
	iface.queue()
	iface.launch()