Spaces:

fawadrashid
/

audio_transcription

Runtime error

fawadrashid commited on Mar 8

Commit

c4f5e53

•

1 Parent(s): c698392

Upload 3 files

Files changed (3) hide show

Dockerfile ADDED Viewed

+FROM python:3.9-slim
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+COPY ./requirements.txt ~/app/requirements.txt
+USER root
+RUN rm /var/lib/apt/lists/* -vf
+RUN apt-get clean
+RUN apt-get update
+RUN apt-get upgrade
+RUN apt-get install -y wget zip unzip uvicorn
+USER user
+COPY . .
+USER root
+RUN chmod 777 ~/app/*
+USER user
+RUN pip3 install -r requirements.txt
+EXPOSE 7860
+CMD ["python", "app.py"]

app.py ADDED Viewed

+import gradio as gr
+from transformers import pipeline
+import numpy as np
+import librosa
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
+def convert_to_mono(y, sr):
+    # Check if the audio is already mono
+    if len(y.shape) == 1:
+        return y
+    # Convert stereo to mono
+    mono_y = librosa.to_mono(y.T)
+    return mono_y
+def transcribe(stream, new_chunk):
+    sr, y = new_chunk
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    # Convert audio to mono if it is stereo
+    y = convert_to_mono(y, sr)
+    if stream is not None:
+        stream = np.concatenate([stream, y])
+    else:
+        stream = y
+    return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
+demo = gr.Interface(
+    transcribe,
+    ["state", gr.Audio(sources=["microphone"], streaming=True)],
+    ["state", "text"],
+    live=True,
+)
+demo.launch()

requirements.txt ADDED Viewed

+opencv-python-headless<4.3
+gradio
+torch
+torchaudio
+transformers
+ffmpeg
+librosa