bilal6913
/

speech2text

Automatic Speech Recognition

Inference Endpoints

Model card Files Files and versions Community

bilal6913 commited on Sep 30, 2024

Commit

0917860

·

verified ·

1 Parent(s): fb211a0

Create app.py

Files changed (1) hide show

app.py +43 -0

app.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import gradio as gr
+import torchaudio
+import torch
+# Load your trained model (replace with your model loading logic)
+# model = ... (load your model here)
+def transcribe(audio):
+    # Load the audio file
+    waveform, sample_rate = torchaudio.load(audio)
+    # Preprocess the audio (if necessary)
+    # Here, we assume that the model expects a specific input format
+    # For example, convert to mono if it's stereo
+    if waveform.shape[0] > 1:  # If stereo, take the first channel
+        waveform = waveform[0, :].unsqueeze(0)
+    # Normalize the waveform (if necessary)
+    waveform = waveform / waveform.abs().max()
+    # Predict text from audio
+    # Make sure to set the model to evaluation mode
+    # model.eval()
+    with torch.no_grad():
+        # Replace this with your model's prediction logic
+        # predicted_text = model(waveform)
+        # Dummy output for illustration
+        predicted_text = "This is a placeholder for the transcribed text."
+    return predicted_text
+# Create Gradio interface
+interface = gr.Interface(
+    fn=transcribe,
+    inputs=gr.Audio(source="upload", type="filepath"),
+    outputs="text",
+    title="Speech-to-Text Transcription",
+    description="Upload an audio file to transcribe it into text."
+)
+if __name__ == "__main__":
+    interface.launch()