Unggi commited on
Commit
c688a3b
1 Parent(s): 5f3392e

change whisper model to whisper api

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. requirements.txt +2 -1
  3. vocal_app.py +27 -17
README.md CHANGED
@@ -3,4 +3,4 @@
3
  # ffmpeg
4
 
5
  apt-get update
6
- apt-get ffmpeg
 
3
  # ffmpeg
4
 
5
  apt-get update
6
+ apt-get install ffmpeg
requirements.txt CHANGED
@@ -3,4 +3,5 @@ boto3
3
  openai
4
  langchain
5
  python-dotenv
6
- ffmpeg-python
 
 
3
  openai
4
  langchain
5
  python-dotenv
6
+ gradio
7
+ git+https://github.com/openai/whisper.git
vocal_app.py CHANGED
@@ -1,24 +1,34 @@
1
-
2
- from transformers import pipeline
3
  import gradio as gr
4
- import time
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- p = pipeline("automatic-speech-recognition")
7
 
8
- def transcribe(audio, state=""):
9
- time.sleep(2)
10
- text = p(audio)["text"]
11
- state += text + " "
12
- return state, state
 
 
13
 
14
  gr.Interface(
15
- fn=transcribe,
 
16
  inputs=[
17
- gr.Audio(source="microphone", type="filepath", streaming=True),
18
- "state"
19
- ],
20
- outputs=[
21
- "textbox",
22
- "state"
23
  ],
24
- live=True).launch()
 
 
1
+ import whisper
 
2
  import gradio as gr
3
+ from dotenv import dotenv_values
4
+ import openai
5
+ import os
6
+
7
+ """
8
+ apt-get update
9
+ apt-get install ffmpeg
10
+ """
11
+
12
+ config = dotenv_values(".env")
13
+
14
+ openai.organization = config.get('OPENAI_ORGANIZATION')
15
+ openai.api_key = config.get('OPENAI_API_KEY')
16
 
 
17
 
18
+ def transcribe(audio):
19
+ os.rename(audio, audio + '.wav')
20
+ file = open(audio + '.wav', "rb")
21
+
22
+ result = openai.Audio.transcribe("whisper-1", file).text
23
+
24
+ return result
25
 
26
  gr.Interface(
27
+ title = 'Whisper Audio to Text with Speaker Recognition',
28
+ fn=transcribe,
29
  inputs=[
30
+ gr.inputs.Audio(source="microphone", type="filepath"),
31
+ #gr.inputs.Number(default=2, label="Number of Speakers")
 
 
 
 
32
  ],
33
+ outputs="text"
34
+ ).launch()