fawadrashid commited on
Commit
c4f5e53
1 Parent(s): c698392

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +29 -0
  2. app.py +38 -0
  3. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+
4
+ RUN useradd -m -u 1000 user
5
+ USER user
6
+ ENV HOME=/home/user \
7
+ PATH=/home/user/.local/bin:$PATH
8
+ WORKDIR $HOME/app
9
+
10
+ COPY --chown=user . $HOME/app
11
+ COPY ./requirements.txt ~/app/requirements.txt
12
+
13
+ USER root
14
+ RUN rm /var/lib/apt/lists/* -vf
15
+ RUN apt-get clean
16
+ RUN apt-get update
17
+ RUN apt-get upgrade
18
+ RUN apt-get install -y wget zip unzip uvicorn
19
+ USER user
20
+ COPY . .
21
+ USER root
22
+ RUN chmod 777 ~/app/*
23
+ USER user
24
+
25
+ RUN pip3 install -r requirements.txt
26
+
27
+ EXPOSE 7860
28
+
29
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import numpy as np
4
+ import librosa
5
+
6
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
7
+
8
+ def convert_to_mono(y, sr):
9
+ # Check if the audio is already mono
10
+ if len(y.shape) == 1:
11
+ return y
12
+ # Convert stereo to mono
13
+ mono_y = librosa.to_mono(y.T)
14
+ return mono_y
15
+
16
+ def transcribe(stream, new_chunk):
17
+ sr, y = new_chunk
18
+ y = y.astype(np.float32)
19
+ y /= np.max(np.abs(y))
20
+
21
+ # Convert audio to mono if it is stereo
22
+ y = convert_to_mono(y, sr)
23
+
24
+ if stream is not None:
25
+ stream = np.concatenate([stream, y])
26
+ else:
27
+ stream = y
28
+ return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
29
+
30
+
31
+ demo = gr.Interface(
32
+ transcribe,
33
+ ["state", gr.Audio(sources=["microphone"], streaming=True)],
34
+ ["state", "text"],
35
+ live=True,
36
+ )
37
+
38
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ opencv-python-headless<4.3
2
+ gradio
3
+ torch
4
+ torchaudio
5
+ transformers
6
+ ffmpeg
7
+ librosa