Spaces:

Lenylvt
/

BetterWhisper

Sleeping

App Files Files Community

Lenylvt commited on May 9, 2024

Commit

9846a71

verified ·

1 Parent(s): 5f9e84f

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -35

app.py CHANGED Viewed

@@ -2,59 +2,58 @@ import streamlit as st
 from gradio_client import Client
 import re
 import os
 st.title("Application de transcription Whisper-JAX 🎙️")
-# Spécifiez l'URL de l'API
 API_URL = "https://sanchit-gandhi-whisper-jax-spaces.hf.space"
-# Initialisez le client Gradio avec l'URL de l'API
 client = Client(API_URL)
-# Fonction pour transcrire un fichier audio en utilisant le point d'API spécifié
-def transcrire_audio(chemin_audio, task="transcription", return_timestamps=True):
-    """Fonction pour transcrire un fichier audio en utilisant le point d'API Whisper-JAX."""
-    with open(chemin_audio, "rb") as file:
-        # Préparation de la requête
-        response = client.predict(
-            file,
-            task,
-            return_timestamps,
-            api_name="/predict_1"  # Assurez-vous que c'est le bon endpoint
-        )
-    return response[0], response[1]  # Ajustez selon la structure de la réponse retournée par l'API
-# Widget Streamlit pour télécharger un fichier audio
 fichier_telecharge = st.file_uploader("Choisissez un fichier audio", type=['mp3', 'wav', 'ogg'])
-# Bouton pour traiter le fichier audio
 if st.button("Transcrire l'audio"):
     if fichier_telecharge is not None:
-        # Enregistrez le fichier téléchargé temporairement
-        chemin_fichier = f"temp_{fichier_telecharge.name}"
-        with open(chemin_fichier, "wb") as f:
-            f.write(fichier_telecharge.getbuffer())
-        # Appel de la fonction de transcription
         try:
-            transcription, temps_traitement = transcrire_audio(chemin_fichier)
             st.write("Transcription avec horodatage :", transcription)
-            # Affichage de la transcription sans horodatages
             transcription_sans_horodatages = remove_timestamps(transcription)
             st.write("Transcription sans horodatage :", transcription_sans_horodatages)
         except Exception as e:
             st.error(f"Une erreur est survenue lors de la transcription : {str(e)}")
-        finally:
-            # Nettoyage du fichier temporaire
-            os.remove(chemin_fichier)
     else:
         st.error("Veuillez télécharger un fichier audio pour continuer.")
-# Fonction pour supprimer les horodatages du texte
-def remove_timestamps(texte):
-    # Motif pour correspondre aux horodatages au format [HH:MM:SS.mmm -> HH:MM:SS.mmm]
-    motif = r"\[\d{2}:\d{2}:\d{2}\.\d{3} -> \d{2}:\d{2}:\d{2}\.\d{3}\]\s*"
-    # Remplacer les motifs correspondants par une chaîne vide
-    texte_nettoye = re.sub(motif, "", texte)
-    return texte_nettoye

 from gradio_client import Client
 import re
 import os
+import base64
 st.title("Application de transcription Whisper-JAX 🎙️")
+# Specify the API URL
 API_URL = "https://sanchit-gandhi-whisper-jax-spaces.hf.space"
+# Initialize the Gradio client with the API URL
 client = Client(API_URL)
+client.view_api(return_format="dict")
+# Function to transcribe an audio file using the specified API endpoint
+def transcrire_audio(file_data, task="transcribe", return_timestamps=True):
+    """Function to transcribe an audio file using the Whisper-JAX API endpoint."""
+    # Encode the file data to base64
+    base64_encoded_data = base64.b64encode(file_data).decode('utf-8')
+    # Prepare and send the request
+    response = client.predict(
+        base64_encoded_data,
+        task,
+        return_timestamps,
+        api_name="/predict_1"  # Make sure this is the correct endpoint
+    )
+    return response[0], response[1]  # Adjust according to the response structure returned by the API
+# Streamlit widget to upload an audio file
 fichier_telecharge = st.file_uploader("Choisissez un fichier audio", type=['mp3', 'wav', 'ogg'])
+# Button to process the audio file
 if st.button("Transcrire l'audio"):
     if fichier_telecharge is not None:
+        # Read the file into memory
+        file_data = fichier_telecharge.getvalue()
+        # Call the transcription function
         try:
+            transcription, runtime = transcrire_audio(file_data)
             st.write("Transcription avec horodatage :", transcription)
+            # Display transcription without timestamps
             transcription_sans_horodatages = remove_timestamps(transcription)
             st.write("Transcription sans horodatage :", transcription_sans_horodatages)
         except Exception as e:
             st.error(f"Une erreur est survenue lors de la transcription : {str(e)}")
     else:
         st.error("Veuillez télécharger un fichier audio pour continuer.")
+# Function to remove timestamps from text
+def remove_timestamps(text):
+    # Pattern to match timestamps in the format [HH:MM:SS.mmm -> HH:MM:SS.mmm]
+    pattern = r"\[\d{2}:\d{2}:\d{2}\.\d{3} -> \d{2}:\d{2}:\d{2}\.\d{3}\]\s*"
+    # Replace matched patterns with an empty string
+    cleaned_text = re.sub(pattern, "", text)
+    return cleaned_text