Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from gradio_client import Client | |
| import re | |
| import os | |
| import base64 | |
| st.title("Application de transcription Whisper-JAX 🎙️") | |
| # Specify the API URL | |
| API_URL = "https://sanchit-gandhi-whisper-jax-spaces.hf.space" | |
| # Initialize the Gradio client with the API URL | |
| client = Client(API_URL) | |
| client.view_api(return_format="dict") | |
| # Function to transcribe an audio file using the specified API endpoint | |
| def transcrire_audio(file_data, task="transcribe", return_timestamps=True): | |
| """Function to transcribe an audio file using the Whisper-JAX API endpoint.""" | |
| # Encode the file data to base64 | |
| base64_encoded_data = base64.b64encode(file_data).decode('utf-8') | |
| # Prepare and send the request | |
| response = client.predict( | |
| base64_encoded_data, | |
| task, | |
| return_timestamps, | |
| api_name="/predict_1" # Make sure this is the correct endpoint | |
| ) | |
| return response[0], response[1] # Adjust according to the response structure returned by the API | |
| # Streamlit widget to upload an audio file | |
| fichier_telecharge = st.file_uploader("Choisissez un fichier audio", type=['mp3', 'wav', 'ogg']) | |
| # Button to process the audio file | |
| if st.button("Transcrire l'audio"): | |
| if fichier_telecharge is not None: | |
| # Read the file into memory | |
| file_data = fichier_telecharge.getvalue() | |
| # Call the transcription function | |
| try: | |
| transcription, runtime = transcrire_audio(file_data) | |
| st.write("Transcription avec horodatage :", transcription) | |
| # Display transcription without timestamps | |
| transcription_sans_horodatages = remove_timestamps(transcription) | |
| st.write("Transcription sans horodatage :", transcription_sans_horodatages) | |
| except Exception as e: | |
| st.error(f"Une erreur est survenue lors de la transcription : {str(e)}") | |
| else: | |
| st.error("Veuillez télécharger un fichier audio pour continuer.") | |
| # Function to remove timestamps from text | |
| def remove_timestamps(text): | |
| # Pattern to match timestamps in the format [HH:MM:SS.mmm -> HH:MM:SS.mmm] | |
| pattern = r"\[\d{2}:\d{2}:\d{2}\.\d{3} -> \d{2}:\d{2}:\d{2}\.\d{3}\]\s*" | |
| # Replace matched patterns with an empty string | |
| cleaned_text = re.sub(pattern, "", text) | |
| return cleaned_text | |