nomnomnonono commited on
Commit
eb90ab7
β€’
1 Parent(s): 18d25ba
Files changed (5) hide show
  1. .gitignore +3 -0
  2. README.md +6 -8
  3. app.py +62 -0
  4. requirements.txt +68 -0
  5. utils.py +56 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .venv
2
+ *.ipynb
3
+ __pycache__
README.md CHANGED
@@ -1,12 +1,10 @@
1
- ---
2
- title: Python Siri
3
- emoji: 🐠
4
- colorFrom: indigo
5
- colorTo: gray
6
  sdk: gradio
7
  sdk_version: 3.23.0
8
  app_file: app.py
9
- pinned: false
10
- ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ title: Siri via Whisper and ChatGPT
2
+ emoji: πŸš€
3
+ colorFrom: red
4
+ colorTo: purple
5
+ python: 3.9.7
6
  sdk: gradio
7
  sdk_version: 3.23.0
8
  app_file: app.py
9
+ pinned: true
 
10
 
 
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from utils import answer_by_chat, transcribe
3
+
4
+ with gr.Blocks() as demo:
5
+ gr.Markdown("Siri-like application via Whisper and ChatGPT")
6
+ with gr.Tabs():
7
+ with gr.TabItem(label="General"):
8
+ with gr.Row():
9
+ with gr.Column(scale=1):
10
+ api_key = gr.Textbox(label="Paste your own openai-api-key")
11
+ with gr.Row():
12
+ audio_input = gr.Audio(
13
+ source="microphone",
14
+ type="filepath",
15
+ label="Record from microphone",
16
+ )
17
+ audio_button = gr.Button("Transcribe")
18
+ audio_output = gr.Textbox()
19
+ with gr.Column(scale=1):
20
+ chat_button = gr.Button("Questions to ChatGPT")
21
+ chat_audio_output = gr.Audio()
22
+ chat_text_output = gr.Textbox()
23
+ with gr.TabItem(label="Setting"):
24
+ gr.Markdown("Prompt Setting")
25
+ with gr.Row():
26
+ role1 = gr.Dropdown(["system", "user", "assistant"], value="system")
27
+ content1 = gr.Textbox(value="あγͺγŸγ―ε½Ήγ«η«‹γ€γ‚’γ‚·γ‚Ήγ‚Ώγƒ³γƒˆγ§γ™γ€‚")
28
+ with gr.Row():
29
+ role2 = gr.Dropdown(["system", "user", "assistant"])
30
+ content2 = gr.Textbox()
31
+ with gr.Row():
32
+ role3 = gr.Dropdown(["system", "user", "assistant"])
33
+ content3 = gr.Textbox()
34
+ with gr.Row():
35
+ role4 = gr.Dropdown(["system", "user", "assistant"])
36
+ content4 = gr.Textbox()
37
+ with gr.Row():
38
+ role5 = gr.Dropdown(["system", "user", "assistant"])
39
+ content5 = gr.Textbox()
40
+ audio_button.click(
41
+ transcribe, inputs=[audio_input], outputs=[audio_output], api_name="transcribe"
42
+ )
43
+ chat_button.click(
44
+ answer_by_chat,
45
+ inputs=[
46
+ audio_output,
47
+ role1,
48
+ content1,
49
+ role2,
50
+ content2,
51
+ role3,
52
+ content3,
53
+ role4,
54
+ content4,
55
+ role5,
56
+ content5,
57
+ api_key,
58
+ ],
59
+ outputs=[chat_text_output, chat_audio_output],
60
+ )
61
+
62
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.1.0
2
+ aiohttp==3.8.4
3
+ aiosignal==1.3.1
4
+ altair==4.2.2
5
+ anyio==3.6.2
6
+ async-timeout==4.0.2
7
+ attrs==22.2.0
8
+ certifi==2022.12.7
9
+ charset-normalizer==3.1.0
10
+ click==8.1.3
11
+ contourpy==1.0.7
12
+ cycler==0.11.0
13
+ entrypoints==0.4
14
+ fastapi==0.95.0
15
+ ffmpy==0.3.0
16
+ filelock==3.10.7
17
+ fonttools==4.39.3
18
+ frozenlist==1.3.3
19
+ fsspec==2023.3.0
20
+ gradio==3.24.1
21
+ gradio_client==0.0.5
22
+ gTTS==2.3.1
23
+ h11==0.14.0
24
+ httpcore==0.16.3
25
+ httpx==0.23.3
26
+ huggingface-hub==0.13.3
27
+ idna==3.4
28
+ importlib-resources==5.12.0
29
+ Jinja2==3.1.2
30
+ jsonschema==4.17.3
31
+ kiwisolver==1.4.4
32
+ linkify-it-py==2.0.0
33
+ markdown-it-py==2.2.0
34
+ MarkupSafe==2.1.2
35
+ matplotlib==3.7.1
36
+ mdit-py-plugins==0.3.3
37
+ mdurl==0.1.2
38
+ multidict==6.0.4
39
+ numpy==1.24.2
40
+ openai==0.27.2
41
+ orjson==3.8.9
42
+ packaging==23.0
43
+ pandas==1.5.3
44
+ Pillow==9.5.0
45
+ pydantic==1.10.7
46
+ pydub==0.25.1
47
+ pyparsing==3.0.9
48
+ pyrsistent==0.19.3
49
+ python-dateutil==2.8.2
50
+ python-multipart==0.0.6
51
+ pytz==2023.3
52
+ PyYAML==6.0
53
+ requests==2.28.2
54
+ rfc3986==1.5.0
55
+ semantic-version==2.10.0
56
+ six==1.16.0
57
+ sniffio==1.3.0
58
+ starlette==0.26.1
59
+ toolz==0.12.0
60
+ tqdm==4.65.0
61
+ typing_extensions==4.5.0
62
+ uc-micro-py==1.0.1
63
+ urllib3==1.26.15
64
+ uvicorn==0.21.1
65
+ websockets==11.0
66
+ whisper==1.1.10
67
+ yarl==1.8.2
68
+ zipp==3.15.0
utils.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import whisper
3
+ from gtts import gTTS
4
+
5
+ model = whisper.load_model("small")
6
+
7
+
8
+ def transcribe(filepath):
9
+ audio = whisper.load_audio(filepath)
10
+ audio = whisper.pad_or_trim(audio)
11
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
12
+ _, probs = model.detect_language(mel)
13
+ global language
14
+ language = max(probs, key=probs.get)
15
+ options = whisper.DecodingOptions(fp16=False)
16
+ result = whisper.decode(model, mel, options)
17
+ return result.text
18
+
19
+
20
+ def answer_by_chat(
21
+ question,
22
+ role1,
23
+ content1,
24
+ role2,
25
+ content2,
26
+ role3,
27
+ content3,
28
+ role4,
29
+ content4,
30
+ role5,
31
+ content5,
32
+ api_key,
33
+ ):
34
+ openai.api_key = api_key
35
+ messages = [
36
+ {"role": role, "content": content}
37
+ for role, content in [
38
+ [role1, content1],
39
+ [role2, content2],
40
+ [role3, content3],
41
+ [role4, content4],
42
+ [role5, content5],
43
+ ]
44
+ if role != "" and content != ""
45
+ ]
46
+ messages.append({"role": "user", "content": question})
47
+ response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
48
+ response_text = response["choices"][0]["message"]["content"]
49
+ response_audio = speech_synthesis(response_text)
50
+ return response_text, response_audio
51
+
52
+
53
+ def speech_synthesis(sentence):
54
+ tts = gTTS(sentence, lang=language)
55
+ tts.save("tmp.mp3")
56
+ return "tmp.mp3"