nomnomnonono
commited on
Commit
·
9ecdb48
1
Parent(s):
a06e71d
udpate
Browse files
app.py
CHANGED
@@ -1,5 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
-
from
|
|
|
|
|
|
|
3 |
|
4 |
with gr.Blocks() as demo:
|
5 |
gr.Markdown("Siri-like application via Whisper and ChatGPT")
|
@@ -8,23 +11,23 @@ with gr.Blocks() as demo:
|
|
8 |
with gr.Row():
|
9 |
with gr.Column(scale=1):
|
10 |
api_key = gr.Textbox(label="Paste your own openai-api-key")
|
|
|
11 |
with gr.Row():
|
12 |
audio_input = gr.Audio(
|
13 |
source="microphone",
|
14 |
-
type="filepath",
|
15 |
label="Record from microphone",
|
16 |
)
|
17 |
audio_button = gr.Button("Transcribe")
|
18 |
audio_output = gr.Textbox()
|
19 |
-
with gr.Column(scale=1):
|
20 |
chat_button = gr.Button("Questions to ChatGPT")
|
21 |
-
|
22 |
-
|
23 |
with gr.TabItem(label="Setting"):
|
24 |
gr.Markdown("Prompt Setting")
|
|
|
25 |
with gr.Row():
|
26 |
role1 = gr.Dropdown(["system", "user", "assistant"], value="system")
|
27 |
-
content1 = gr.Textbox(value="
|
28 |
with gr.Row():
|
29 |
role2 = gr.Dropdown(["system", "user", "assistant"])
|
30 |
content2 = gr.Textbox()
|
@@ -37,13 +40,10 @@ with gr.Blocks() as demo:
|
|
37 |
with gr.Row():
|
38 |
role5 = gr.Dropdown(["system", "user", "assistant"])
|
39 |
content5 = gr.Textbox()
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
chat_button.click(
|
44 |
-
answer_by_chat,
|
45 |
inputs=[
|
46 |
-
audio_output,
|
47 |
role1,
|
48 |
content1,
|
49 |
role2,
|
@@ -55,8 +55,20 @@ with gr.Blocks() as demo:
|
|
55 |
role5,
|
56 |
content5,
|
57 |
api_key,
|
|
|
58 |
],
|
59 |
-
outputs=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
)
|
61 |
|
62 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from chat import CahtBOT
|
3 |
+
|
4 |
+
chat = CahtBOT()
|
5 |
+
|
6 |
|
7 |
with gr.Blocks() as demo:
|
8 |
gr.Markdown("Siri-like application via Whisper and ChatGPT")
|
|
|
11 |
with gr.Row():
|
12 |
with gr.Column(scale=1):
|
13 |
api_key = gr.Textbox(label="Paste your own openai-api-key")
|
14 |
+
api_button = gr.Button("SetUp")
|
15 |
with gr.Row():
|
16 |
audio_input = gr.Audio(
|
17 |
source="microphone",
|
|
|
18 |
label="Record from microphone",
|
19 |
)
|
20 |
audio_button = gr.Button("Transcribe")
|
21 |
audio_output = gr.Textbox()
|
|
|
22 |
chat_button = gr.Button("Questions to ChatGPT")
|
23 |
+
with gr.Column(scale=1):
|
24 |
+
chatbot = gr.Chatbot([], elemid="chatbot").style(height=750)
|
25 |
with gr.TabItem(label="Setting"):
|
26 |
gr.Markdown("Prompt Setting")
|
27 |
+
language = gr.Dropdown(["Japanese", "English"], value="English")
|
28 |
with gr.Row():
|
29 |
role1 = gr.Dropdown(["system", "user", "assistant"], value="system")
|
30 |
+
content1 = gr.Textbox(value="You're helpful assistant.")
|
31 |
with gr.Row():
|
32 |
role2 = gr.Dropdown(["system", "user", "assistant"])
|
33 |
content2 = gr.Textbox()
|
|
|
40 |
with gr.Row():
|
41 |
role5 = gr.Dropdown(["system", "user", "assistant"])
|
42 |
content5 = gr.Textbox()
|
43 |
+
|
44 |
+
api_button.click(
|
45 |
+
chat.setup,
|
|
|
|
|
46 |
inputs=[
|
|
|
47 |
role1,
|
48 |
content1,
|
49 |
role2,
|
|
|
55 |
role5,
|
56 |
content5,
|
57 |
api_key,
|
58 |
+
language,
|
59 |
],
|
60 |
+
outputs=None,
|
61 |
+
)
|
62 |
+
audio_button.click(
|
63 |
+
chat.transcribe,
|
64 |
+
inputs=[audio_input],
|
65 |
+
outputs=[audio_output],
|
66 |
+
api_name="transcribe",
|
67 |
+
)
|
68 |
+
chat_button.click(
|
69 |
+
chat.answer_by_chat,
|
70 |
+
inputs=[chatbot, audio_output],
|
71 |
+
outputs=[chatbot],
|
72 |
)
|
73 |
|
74 |
demo.launch()
|
chat.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
import soundfile
|
3 |
+
|
4 |
+
# import whisper
|
5 |
+
from gtts import gTTS
|
6 |
+
|
7 |
+
dic = {"Japanese": "ja", "English": "en"}
|
8 |
+
|
9 |
+
|
10 |
+
class CahtBOT:
|
11 |
+
def __init__(self):
|
12 |
+
self.messages = None
|
13 |
+
|
14 |
+
def setup(
|
15 |
+
self,
|
16 |
+
role1,
|
17 |
+
content1,
|
18 |
+
role2,
|
19 |
+
content2,
|
20 |
+
role3,
|
21 |
+
content3,
|
22 |
+
role4,
|
23 |
+
content4,
|
24 |
+
role5,
|
25 |
+
content5,
|
26 |
+
api_key,
|
27 |
+
language,
|
28 |
+
):
|
29 |
+
openai.api_key = api_key
|
30 |
+
self.language = dic[language]
|
31 |
+
self.messages = [
|
32 |
+
{"role": role, "content": content}
|
33 |
+
for role, content in [
|
34 |
+
[role1, content1],
|
35 |
+
[role2, content2],
|
36 |
+
[role3, content3],
|
37 |
+
[role4, content4],
|
38 |
+
[role5, content5],
|
39 |
+
]
|
40 |
+
if role != "" and content != ""
|
41 |
+
]
|
42 |
+
|
43 |
+
def transcribe(self, audio):
|
44 |
+
sample_rate, data = audio
|
45 |
+
soundfile.write(file="tmp.wav", data=data, samplerate=sample_rate)
|
46 |
+
audio_file = open("tmp.wav", "rb")
|
47 |
+
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
48 |
+
return transcript.text
|
49 |
+
|
50 |
+
def answer_by_chat(self, history, question):
|
51 |
+
self.messages.append({"role": "user", "content": question})
|
52 |
+
history += [(question, None)]
|
53 |
+
response = openai.ChatCompletion.create(
|
54 |
+
model="gpt-3.5-turbo", messages=self.messages
|
55 |
+
)
|
56 |
+
response_text = response["choices"][0]["message"]["content"]
|
57 |
+
response_role = response["choices"][0]["message"]["role"]
|
58 |
+
response_audio = self.speech_synthesis(response_text)
|
59 |
+
self.messages.append({"role": response_role, "content": response_text})
|
60 |
+
# history += [(None, response_text)]
|
61 |
+
history += [(None, (response_audio,))]
|
62 |
+
return history
|
63 |
+
|
64 |
+
def speech_synthesis(self, sentence):
|
65 |
+
tts = gTTS(sentence, lang=self.language)
|
66 |
+
tts.save("tmp.wav")
|
67 |
+
return "tmp.wav"
|
requirements.txt
CHANGED
@@ -6,6 +6,7 @@ anyio==3.6.2
|
|
6 |
async-timeout==4.0.2
|
7 |
attrs==22.2.0
|
8 |
certifi==2022.12.7
|
|
|
9 |
charset-normalizer==3.1.0
|
10 |
click==8.1.3
|
11 |
contourpy==1.0.7
|
@@ -51,6 +52,7 @@ orjson==3.8.9
|
|
51 |
packaging==23.0
|
52 |
pandas==1.5.3
|
53 |
Pillow==9.5.0
|
|
|
54 |
pydantic==1.10.7
|
55 |
pydub==0.25.1
|
56 |
pyparsing==3.0.9
|
@@ -65,6 +67,7 @@ rfc3986==1.5.0
|
|
65 |
semantic-version==2.10.0
|
66 |
six==1.16.0
|
67 |
sniffio==1.3.0
|
|
|
68 |
starlette==0.26.1
|
69 |
sympy==1.11.1
|
70 |
tiktoken==0.3.1
|
@@ -76,6 +79,5 @@ uc-micro-py==1.0.1
|
|
76 |
urllib3==1.26.15
|
77 |
uvicorn==0.21.1
|
78 |
websockets==11.0
|
79 |
-
whisper==1.1.10
|
80 |
yarl==1.8.2
|
81 |
zipp==3.15.0
|
|
|
6 |
async-timeout==4.0.2
|
7 |
attrs==22.2.0
|
8 |
certifi==2022.12.7
|
9 |
+
cffi==1.15.1
|
10 |
charset-normalizer==3.1.0
|
11 |
click==8.1.3
|
12 |
contourpy==1.0.7
|
|
|
52 |
packaging==23.0
|
53 |
pandas==1.5.3
|
54 |
Pillow==9.5.0
|
55 |
+
pycparser==2.21
|
56 |
pydantic==1.10.7
|
57 |
pydub==0.25.1
|
58 |
pyparsing==3.0.9
|
|
|
67 |
semantic-version==2.10.0
|
68 |
six==1.16.0
|
69 |
sniffio==1.3.0
|
70 |
+
soundfile==0.12.1
|
71 |
starlette==0.26.1
|
72 |
sympy==1.11.1
|
73 |
tiktoken==0.3.1
|
|
|
79 |
urllib3==1.26.15
|
80 |
uvicorn==0.21.1
|
81 |
websockets==11.0
|
|
|
82 |
yarl==1.8.2
|
83 |
zipp==3.15.0
|
utils.py
DELETED
@@ -1,56 +0,0 @@
|
|
1 |
-
import openai
|
2 |
-
import whisper
|
3 |
-
from gtts import gTTS
|
4 |
-
|
5 |
-
model = whisper.load_model("small")
|
6 |
-
|
7 |
-
|
8 |
-
def transcribe(filepath):
|
9 |
-
audio = whisper.load_audio(filepath)
|
10 |
-
audio = whisper.pad_or_trim(audio)
|
11 |
-
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
12 |
-
_, probs = model.detect_language(mel)
|
13 |
-
global language
|
14 |
-
language = max(probs, key=probs.get)
|
15 |
-
options = whisper.DecodingOptions(fp16=False)
|
16 |
-
result = whisper.decode(model, mel, options)
|
17 |
-
return result.text
|
18 |
-
|
19 |
-
|
20 |
-
def answer_by_chat(
|
21 |
-
question,
|
22 |
-
role1,
|
23 |
-
content1,
|
24 |
-
role2,
|
25 |
-
content2,
|
26 |
-
role3,
|
27 |
-
content3,
|
28 |
-
role4,
|
29 |
-
content4,
|
30 |
-
role5,
|
31 |
-
content5,
|
32 |
-
api_key,
|
33 |
-
):
|
34 |
-
openai.api_key = api_key
|
35 |
-
messages = [
|
36 |
-
{"role": role, "content": content}
|
37 |
-
for role, content in [
|
38 |
-
[role1, content1],
|
39 |
-
[role2, content2],
|
40 |
-
[role3, content3],
|
41 |
-
[role4, content4],
|
42 |
-
[role5, content5],
|
43 |
-
]
|
44 |
-
if role != "" and content != ""
|
45 |
-
]
|
46 |
-
messages.append({"role": "user", "content": question})
|
47 |
-
response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
|
48 |
-
response_text = response["choices"][0]["message"]["content"]
|
49 |
-
response_audio = speech_synthesis(response_text)
|
50 |
-
return response_text, response_audio
|
51 |
-
|
52 |
-
|
53 |
-
def speech_synthesis(sentence):
|
54 |
-
tts = gTTS(sentence, lang=language)
|
55 |
-
tts.save("tmp.mp3")
|
56 |
-
return "tmp.mp3"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|