Spaces:

omarxadel
/

egyptian-arabic-speech-to-text

Running

omarxadel commited on Oct 13, 2023

Commit

527afaa

•

1 Parent(s): 55d853c

fix: update code

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,37 +1,27 @@
-from transformers import HubertForCTC, Wav2Vec2Processor
 import gradio as gr
-import time
-import torch
-import soundfile as sf
 import requests
 import os
-API_URL = "https://api-inference.huggingface.co/models/omarxadel/hubert-large-arabic-egyptian"
-token = os.environ['apikey']
-headers = {"Authorization": token}
-def transcribe(audio, state=""):
-    time.sleep(2)
-    # Load model from HuggingFace Hub
-    with open(audio, "rb") as f:
-        data = f.read()
-    response = requests.post(API_URL, headers=headers, data=data)
-    output = response.json()["text"]
-    state += output + " "
-    return state, state
-gr.Interface(
-    fn=transcribe,
-    inputs=[
-        gr.Audio(source="microphone", type="filepath", streaming=True),
-        "state"
-    ],
-    outputs=[
-        "textbox",
-        "state"
-    ],
-    live=True).launch(share=True)

+from transformers import pipeline
 import gradio as gr
 import requests
 import os
+transcriber = pipeline("automatic-speech-recognition", model="omarxadel/hubert-large-arabic-egyptian")
+def transcribe(stream, new_chunk):
+    sr, y = new_chunk
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    if stream is not None:
+        stream = np.concatenate([stream, y])
+    else:
+        stream = y
+    return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
+demo = gr.Interface(
+    transcribe,
+    ["state", gr.Audio(source="microphone", streaming=True)],
+    ["state", "text"],
+    live=True,
+)
+demo.launch()

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ