import streamlit as st from gradio_client import Client import re import os import base64 st.title("Application de transcription Whisper-JAX 🎙️") # Specify the API URL API_URL = "https://sanchit-gandhi-whisper-jax-spaces.hf.space" # Initialize the Gradio client with the API URL client = Client(API_URL) client.view_api(return_format="dict") # Function to transcribe an audio file using the specified API endpoint def transcrire_audio(file_data, task="transcribe", return_timestamps=True): """Function to transcribe an audio file using the Whisper-JAX API endpoint.""" # Encode the file data to base64 base64_encoded_data = base64.b64encode(file_data).decode('utf-8') # Prepare and send the request response = client.predict( base64_encoded_data, task, return_timestamps, api_name="/predict_1" # Make sure this is the correct endpoint ) return response[0], response[1] # Adjust according to the response structure returned by the API # Streamlit widget to upload an audio file fichier_telecharge = st.file_uploader("Choisissez un fichier audio", type=['mp3', 'wav', 'ogg']) # Button to process the audio file if st.button("Transcrire l'audio"): if fichier_telecharge is not None: # Read the file into memory file_data = fichier_telecharge.getvalue() # Call the transcription function try: transcription, runtime = transcrire_audio(file_data) st.write("Transcription avec horodatage :", transcription) # Display transcription without timestamps transcription_sans_horodatages = remove_timestamps(transcription) st.write("Transcription sans horodatage :", transcription_sans_horodatages) except Exception as e: st.error(f"Une erreur est survenue lors de la transcription : {str(e)}") else: st.error("Veuillez télécharger un fichier audio pour continuer.") # Function to remove timestamps from text def remove_timestamps(text): # Pattern to match timestamps in the format [HH:MM:SS.mmm -> HH:MM:SS.mmm] pattern = r"\[\d{2}:\d{2}:\d{2}\.\d{3} -> \d{2}:\d{2}:\d{2}\.\d{3}\]\s*" # Replace matched patterns with an empty string cleaned_text = re.sub(pattern, "", text) return cleaned_text