Spaces:
Runtime error
Runtime error
| import os | |
| from time import time | |
| import gradio as gr | |
| import requests | |
| from languages import LANGUAGES | |
| GLADIA_API_KEY = os.environ.get("GLADIA_API_KEY") | |
| headers = { | |
| "accept": "application/json", | |
| "x-gladia-key": GLADIA_API_KEY, | |
| } | |
| ACCEPTED_LANGUAGE_BEHAVIOUR = [ | |
| "manual", | |
| "automatic single language", | |
| "automatic multiple languages", | |
| ] | |
| def transcribe( | |
| audio: str = None, | |
| ) -> dict: | |
| """ | |
| This function transcribes audio to text using the Gladia API. | |
| It sends a request to the API with the given audio file or audio URL, and returns the transcribed text. | |
| Get your api key at gladia.io ! | |
| Parameters: | |
| audio (str): The path to the audio file to transcribe. | |
| Returns: | |
| dict: A dictionary containing the transcribed text and other metadata about the transcription process. If an error occurs, the function returns a string with an error message. | |
| """ | |
| DEFAULT_MANUAL_LANGUAGE = "english" | |
| language_behaviour = ACCEPTED_LANGUAGE_BEHAVIOUR[2] | |
| # if video file is there then send the audio field as the content of the video | |
| # if video file is there then send the audio field as the content of the video | |
| files = { | |
| "language_behaviour": (None, language_behaviour), | |
| "noise_reduction": (None, "false"), | |
| 'output_format': (None, 'json'), | |
| 'toggle_diarization': (None, 'true'), | |
| 'diarization_max_speakers': (None, '2'), | |
| } | |
| # priority given to the audio or video | |
| if audio: | |
| files["audio"] = (audio, open(audio, "rb"), "audio/wav") | |
| # if language is manual then send the language field | |
| # if it's there for language_behaviour == automatic* | |
| # it will ignored anyways | |
| if language_behaviour == "manual": | |
| files["language"] = (None, DEFAULT_MANUAL_LANGUAGE) | |
| start_transfer = time() | |
| response = requests.post( | |
| "https://api.gladia.io/audio/text/audio-transcription/", | |
| headers=headers, | |
| files=files, | |
| ) | |
| end_transfer = time() | |
| if response.status_code != 200: | |
| print(response.content, response.status_code) | |
| return "Sorry, an error occured with your request :/" | |
| # we have 2 outputs: | |
| # prediction and prediction_raw | |
| # prediction_raw has more details about the processing | |
| # and other debugging detailed element you might be | |
| # interested in | |
| segments = response.json()["prediction"] | |
| output = "" | |
| current_speaker = "" | |
| for segment in segments: | |
| if segment["speaker"] != current_speaker and segment["speaker"]!= "unknown": | |
| current_speaker = segment["speaker"] | |
| output = output + "<br/><br/><b> Speaker:" + str(segment["speaker"]) + ":</b> " + segment["transcription"] | |
| else: | |
| output = output + " " + segment["transcription"] | |
| return output, response.json()["prediction_raw"] | |
| iface = gr.Interface( | |
| title="Gladia.io fast audio transcription", | |
| description="""Gladia.io Whisper large-v2 fast audio transcription API | |
| is able to perform fast audio transcriptions for any audio / video (less than a minute per hour) .<br/>For more details and a benchmark ran on multiple Speech-To-Text providers, please visit | |
| [our post](https://medium.com/@gladia.io/gladia-alpha-launch-redefining-what-s-possible-with-speech-to-text-ai-686dd4312a86) on Medium. | |
| <br/><br/> | |
| You are more than welcome to join us on [Slack](https://gladia-io.slack.com) | |
| and don't forget to get your own API key on [Gladia.io](https://gladia.io/) during the free alpha ! | |
| """, | |
| fn=transcribe, | |
| inputs=[ | |
| gr.Audio(label="Audio file", source="upload", type="filepath"), | |
| ], | |
| outputs=["html", "json"], | |
| examples=[ | |
| ["examples/good.will.hunting.wav"], | |
| ["examples/wolf.of.wall.street.wav"], | |
| ], | |
| ) | |
| iface.queue() | |
| iface.launch() | |