nomnomnonono commited on
Commit
9ecdb48
·
1 Parent(s): a06e71d
Files changed (4) hide show
  1. app.py +25 -13
  2. chat.py +67 -0
  3. requirements.txt +3 -1
  4. utils.py +0 -56
app.py CHANGED
@@ -1,5 +1,8 @@
1
  import gradio as gr
2
- from utils import answer_by_chat, transcribe
 
 
 
3
 
4
  with gr.Blocks() as demo:
5
  gr.Markdown("Siri-like application via Whisper and ChatGPT")
@@ -8,23 +11,23 @@ with gr.Blocks() as demo:
8
  with gr.Row():
9
  with gr.Column(scale=1):
10
  api_key = gr.Textbox(label="Paste your own openai-api-key")
 
11
  with gr.Row():
12
  audio_input = gr.Audio(
13
  source="microphone",
14
- type="filepath",
15
  label="Record from microphone",
16
  )
17
  audio_button = gr.Button("Transcribe")
18
  audio_output = gr.Textbox()
19
- with gr.Column(scale=1):
20
  chat_button = gr.Button("Questions to ChatGPT")
21
- chat_audio_output = gr.Audio()
22
- chat_text_output = gr.Textbox()
23
  with gr.TabItem(label="Setting"):
24
  gr.Markdown("Prompt Setting")
 
25
  with gr.Row():
26
  role1 = gr.Dropdown(["system", "user", "assistant"], value="system")
27
- content1 = gr.Textbox(value="あなたは役に立つアシスタントです。")
28
  with gr.Row():
29
  role2 = gr.Dropdown(["system", "user", "assistant"])
30
  content2 = gr.Textbox()
@@ -37,13 +40,10 @@ with gr.Blocks() as demo:
37
  with gr.Row():
38
  role5 = gr.Dropdown(["system", "user", "assistant"])
39
  content5 = gr.Textbox()
40
- audio_button.click(
41
- transcribe, inputs=[audio_input], outputs=[audio_output], api_name="transcribe"
42
- )
43
- chat_button.click(
44
- answer_by_chat,
45
  inputs=[
46
- audio_output,
47
  role1,
48
  content1,
49
  role2,
@@ -55,8 +55,20 @@ with gr.Blocks() as demo:
55
  role5,
56
  content5,
57
  api_key,
 
58
  ],
59
- outputs=[chat_text_output, chat_audio_output],
 
 
 
 
 
 
 
 
 
 
 
60
  )
61
 
62
  demo.launch()
 
1
  import gradio as gr
2
+ from chat import CahtBOT
3
+
4
+ chat = CahtBOT()
5
+
6
 
7
  with gr.Blocks() as demo:
8
  gr.Markdown("Siri-like application via Whisper and ChatGPT")
 
11
  with gr.Row():
12
  with gr.Column(scale=1):
13
  api_key = gr.Textbox(label="Paste your own openai-api-key")
14
+ api_button = gr.Button("SetUp")
15
  with gr.Row():
16
  audio_input = gr.Audio(
17
  source="microphone",
 
18
  label="Record from microphone",
19
  )
20
  audio_button = gr.Button("Transcribe")
21
  audio_output = gr.Textbox()
 
22
  chat_button = gr.Button("Questions to ChatGPT")
23
+ with gr.Column(scale=1):
24
+ chatbot = gr.Chatbot([], elemid="chatbot").style(height=750)
25
  with gr.TabItem(label="Setting"):
26
  gr.Markdown("Prompt Setting")
27
+ language = gr.Dropdown(["Japanese", "English"], value="English")
28
  with gr.Row():
29
  role1 = gr.Dropdown(["system", "user", "assistant"], value="system")
30
+ content1 = gr.Textbox(value="You're helpful assistant.")
31
  with gr.Row():
32
  role2 = gr.Dropdown(["system", "user", "assistant"])
33
  content2 = gr.Textbox()
 
40
  with gr.Row():
41
  role5 = gr.Dropdown(["system", "user", "assistant"])
42
  content5 = gr.Textbox()
43
+
44
+ api_button.click(
45
+ chat.setup,
 
 
46
  inputs=[
 
47
  role1,
48
  content1,
49
  role2,
 
55
  role5,
56
  content5,
57
  api_key,
58
+ language,
59
  ],
60
+ outputs=None,
61
+ )
62
+ audio_button.click(
63
+ chat.transcribe,
64
+ inputs=[audio_input],
65
+ outputs=[audio_output],
66
+ api_name="transcribe",
67
+ )
68
+ chat_button.click(
69
+ chat.answer_by_chat,
70
+ inputs=[chatbot, audio_output],
71
+ outputs=[chatbot],
72
  )
73
 
74
  demo.launch()
chat.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import soundfile
3
+
4
+ # import whisper
5
+ from gtts import gTTS
6
+
7
+ dic = {"Japanese": "ja", "English": "en"}
8
+
9
+
10
+ class CahtBOT:
11
+ def __init__(self):
12
+ self.messages = None
13
+
14
+ def setup(
15
+ self,
16
+ role1,
17
+ content1,
18
+ role2,
19
+ content2,
20
+ role3,
21
+ content3,
22
+ role4,
23
+ content4,
24
+ role5,
25
+ content5,
26
+ api_key,
27
+ language,
28
+ ):
29
+ openai.api_key = api_key
30
+ self.language = dic[language]
31
+ self.messages = [
32
+ {"role": role, "content": content}
33
+ for role, content in [
34
+ [role1, content1],
35
+ [role2, content2],
36
+ [role3, content3],
37
+ [role4, content4],
38
+ [role5, content5],
39
+ ]
40
+ if role != "" and content != ""
41
+ ]
42
+
43
+ def transcribe(self, audio):
44
+ sample_rate, data = audio
45
+ soundfile.write(file="tmp.wav", data=data, samplerate=sample_rate)
46
+ audio_file = open("tmp.wav", "rb")
47
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
48
+ return transcript.text
49
+
50
+ def answer_by_chat(self, history, question):
51
+ self.messages.append({"role": "user", "content": question})
52
+ history += [(question, None)]
53
+ response = openai.ChatCompletion.create(
54
+ model="gpt-3.5-turbo", messages=self.messages
55
+ )
56
+ response_text = response["choices"][0]["message"]["content"]
57
+ response_role = response["choices"][0]["message"]["role"]
58
+ response_audio = self.speech_synthesis(response_text)
59
+ self.messages.append({"role": response_role, "content": response_text})
60
+ # history += [(None, response_text)]
61
+ history += [(None, (response_audio,))]
62
+ return history
63
+
64
+ def speech_synthesis(self, sentence):
65
+ tts = gTTS(sentence, lang=self.language)
66
+ tts.save("tmp.wav")
67
+ return "tmp.wav"
requirements.txt CHANGED
@@ -6,6 +6,7 @@ anyio==3.6.2
6
  async-timeout==4.0.2
7
  attrs==22.2.0
8
  certifi==2022.12.7
 
9
  charset-normalizer==3.1.0
10
  click==8.1.3
11
  contourpy==1.0.7
@@ -51,6 +52,7 @@ orjson==3.8.9
51
  packaging==23.0
52
  pandas==1.5.3
53
  Pillow==9.5.0
 
54
  pydantic==1.10.7
55
  pydub==0.25.1
56
  pyparsing==3.0.9
@@ -65,6 +67,7 @@ rfc3986==1.5.0
65
  semantic-version==2.10.0
66
  six==1.16.0
67
  sniffio==1.3.0
 
68
  starlette==0.26.1
69
  sympy==1.11.1
70
  tiktoken==0.3.1
@@ -76,6 +79,5 @@ uc-micro-py==1.0.1
76
  urllib3==1.26.15
77
  uvicorn==0.21.1
78
  websockets==11.0
79
- whisper==1.1.10
80
  yarl==1.8.2
81
  zipp==3.15.0
 
6
  async-timeout==4.0.2
7
  attrs==22.2.0
8
  certifi==2022.12.7
9
+ cffi==1.15.1
10
  charset-normalizer==3.1.0
11
  click==8.1.3
12
  contourpy==1.0.7
 
52
  packaging==23.0
53
  pandas==1.5.3
54
  Pillow==9.5.0
55
+ pycparser==2.21
56
  pydantic==1.10.7
57
  pydub==0.25.1
58
  pyparsing==3.0.9
 
67
  semantic-version==2.10.0
68
  six==1.16.0
69
  sniffio==1.3.0
70
+ soundfile==0.12.1
71
  starlette==0.26.1
72
  sympy==1.11.1
73
  tiktoken==0.3.1
 
79
  urllib3==1.26.15
80
  uvicorn==0.21.1
81
  websockets==11.0
 
82
  yarl==1.8.2
83
  zipp==3.15.0
utils.py DELETED
@@ -1,56 +0,0 @@
1
- import openai
2
- import whisper
3
- from gtts import gTTS
4
-
5
- model = whisper.load_model("small")
6
-
7
-
8
- def transcribe(filepath):
9
- audio = whisper.load_audio(filepath)
10
- audio = whisper.pad_or_trim(audio)
11
- mel = whisper.log_mel_spectrogram(audio).to(model.device)
12
- _, probs = model.detect_language(mel)
13
- global language
14
- language = max(probs, key=probs.get)
15
- options = whisper.DecodingOptions(fp16=False)
16
- result = whisper.decode(model, mel, options)
17
- return result.text
18
-
19
-
20
- def answer_by_chat(
21
- question,
22
- role1,
23
- content1,
24
- role2,
25
- content2,
26
- role3,
27
- content3,
28
- role4,
29
- content4,
30
- role5,
31
- content5,
32
- api_key,
33
- ):
34
- openai.api_key = api_key
35
- messages = [
36
- {"role": role, "content": content}
37
- for role, content in [
38
- [role1, content1],
39
- [role2, content2],
40
- [role3, content3],
41
- [role4, content4],
42
- [role5, content5],
43
- ]
44
- if role != "" and content != ""
45
- ]
46
- messages.append({"role": "user", "content": question})
47
- response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
48
- response_text = response["choices"][0]["message"]["content"]
49
- response_audio = speech_synthesis(response_text)
50
- return response_text, response_audio
51
-
52
-
53
- def speech_synthesis(sentence):
54
- tts = gTTS(sentence, lang=language)
55
- tts.save("tmp.mp3")
56
- return "tmp.mp3"