Spaces:

Gladiaio
/

Audio-Transcription

Runtime error

App Files Files Community

Audio-Transcription / app.py

mrodriguez

feat(app): add example and small mods

dfb8f1c verified almost 2 years ago

raw

history blame

5.61 kB

	import os
	import requests

	import gradio as gr
	from languages import LANGUAGES
	from time import time

	GLADIA_API_KEY = os.environ.get("GLADIA_API_KEY")

	headers = {
	"accept": "application/json",
	"x-gladia-key": GLADIA_API_KEY,
	}

	ACCEPTED_LANGUAGE_BEHAVIOUR = [
	"manual",
	"automatic single language",
	"automatic multiple languages",
	]


	def transcribe(
	audio: str = None,
	language_behaviour: str = ACCEPTED_LANGUAGE_BEHAVIOUR[2],
	language: str = "english",
	) -> dict:
	"""
	This function transcribes audio to text using the Gladia API.
	It sends a request to the API with the given audio file or audio URL, and returns the transcribed text.
	Get your api key at gladia.io !

	Parameters:
	audio_url (str): The URL of the audio file to transcribe. If audio_url is provided, audio file will be ignored.
	audio (str): The path to the audio file to transcribe.
	video (str): The path to the video file. If provided, the audio field will be set to the content of this video.
	language_behaviour (str): Determines how language detection should be performed.
	Must be one of [
	"manual",
	"automatic single language",
	"automatic multiple languages"
	]
	If "manual", the language field must be provided and the API will transcribe the audio in the given language.
	If "automatic single language", the language of the audio will be automatically detected by the API
	but will force the transcription to be in a single language.
	If "automatic multiple languages", the language of the audio will be automatically detected by the API for
	each sentence allowing code-switching over 97 languages.

	language (str): The language of the audio file. This field is ignored if language_behaviour is set to "automatic*".

	Returns:
	dict: A dictionary containing the transcribed text and other metadata about the transcription process. If an error occurs, the function returns a string with an error message.
	"""

	# if video file is there then send the audio field as the content of the video
	files = {
	"language_behaviour": (None, language_behaviour),
	}

	# priority given to the audio or video
	if audio:
	files["audio"] = (audio, open(audio, "rb"), "audio/wav")
	else:
	files["audio_url"] = ((None, audio_url),)

	# if language is manual then send the language field
	# if it's there for language_behaviour == automatic*
	# it will ignored anyways
	if language_behaviour == "manual":
	files["language"] = (None, language)

	start_transfer = time()
	response = requests.post(
	"https://api.gladia.io/audio/text/audio-transcription/",
	headers=headers,
	files=files,
	)
	end_transfer = time()

	if response.status_code != 200:
	print(response.content, response.status_code)

	return "Sorry, an error occured with your request :/"

	# we have 2 outputs:
	# prediction and prediction_raw
	# prediction_raw has more details about the processing
	# and other debugging detailed element you might be
	# interested in

	output = response.json()["prediction_raw"]

	output["metadata"]["client_total_execution_time"] = end_transfer - start_transfer
	output["metadata"]["data_transfer_time"] = output["metadata"]["client_total_execution_time"] -output["metadata"]["total_transcription_time"]
	output["metadata"]["api_server_transcription_time"] = output["metadata"]["total_transcription_time"]

	del output["metadata"]["original_mediainfo"]

	return output


	iface = gr.Interface(
	title="Gladia.io fast audio transcription",
	description="""Gladia.io Whisper large-v2 fast audio transcription API
	is able to perform fast audio transcriptions for any audio / video.
	For more details and a benchmark ran on multiple Speech-To-Text providers, please visit
	[our post](https://medium.com/@gladia.io/gladia-alpha-launch-redefining-what-s-possible-with-speech-to-text-ai-686dd4312a86) on Medium.
	<br/><br/>
	You are more than welcome to join our [Slack](https://gladia-io.slack.com) to discuss with us
	and also don't forget to get your own API key on [Gladia.io](https://gladia.io/) during the free alpha !
	""",
	fn=transcribe,
	inputs=[
	gr.Audio(label="or Audio file to transcribe", source="upload", type="filepath"),
	gr.Dropdown(
	label="""Language transcription behaviour:\n
	If "manual", the language field must be provided and the API will transcribe the audio in the given language.
	If "automatic single language", the language of the audio will be automatically detected by the API
	but will force the transcription to be in a single language.
	If "automatic multiple languages", the language of the audio will be automatically detected by the API for
	each sentence allowing code-switching over 97 languages.
	""",
	choices=ACCEPTED_LANGUAGE_BEHAVIOUR,
	value=ACCEPTED_LANGUAGE_BEHAVIOUR[1]
	),
	gr.Dropdown(
	choices=sorted([language_name for language_name in LANGUAGES.keys()]),
	label="Language (only if language behaviour is set to manual)",
	value="english"
	),
	],
	outputs="json",
	examples=[
	["examples/good.will.hunting.wav", ACCEPTED_LANGUAGE_BEHAVIOUR[1], "english"],
	["examples/wolf.of.wall.street.wav", ACCEPTED_LANGUAGE_BEHAVIOUR[1], "english"],
	],
	)
	iface.queue()
	iface.launch()