Spaces:

nomnomnonono
/

Siri-via-Whisper-ChatGPT

Sleeping

App Files Files Community

nomnomnonono commited on Apr 3, 2023

Commit

eb90ab7

1 Parent(s): 18d25ba

initial

Browse files

Files changed (5) hide show

.gitignore +3 -0
README.md +6 -8
app.py +62 -0
requirements.txt +68 -0
utils.py +56 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.venv
+*.ipynb
+__pycache__

README.md CHANGED Viewed

@@ -1,12 +1,10 @@
----
-title: Python Siri
-emoji: 🐠
-colorFrom: indigo
-colorTo: gray
 sdk: gradio
 sdk_version: 3.23.0
 app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+title: Siri via Whisper and ChatGPT
+emoji: 🚀
+colorFrom: red
+colorTo: purple
+python: 3.9.7
 sdk: gradio
 sdk_version: 3.23.0
 app_file: app.py
+pinned: true

app.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import gradio as gr
+from utils import answer_by_chat, transcribe
+with gr.Blocks() as demo:
+    gr.Markdown("Siri-like application via Whisper and ChatGPT")
+    with gr.Tabs():
+        with gr.TabItem(label="General"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    api_key = gr.Textbox(label="Paste your own openai-api-key")
+                    with gr.Row():
+                        audio_input = gr.Audio(
+                            source="microphone",
+                            type="filepath",
+                            label="Record from microphone",
+                        )
+                        audio_button = gr.Button("Transcribe")
+                    audio_output = gr.Textbox()
+                with gr.Column(scale=1):
+                    chat_button = gr.Button("Questions to ChatGPT")
+                    chat_audio_output = gr.Audio()
+                    chat_text_output = gr.Textbox()
+        with gr.TabItem(label="Setting"):
+            gr.Markdown("Prompt Setting")
+            with gr.Row():
+                role1 = gr.Dropdown(["system", "user", "assistant"], value="system")
+                content1 = gr.Textbox(value="あなたは役に立つアシスタントです。")
+            with gr.Row():
+                role2 = gr.Dropdown(["system", "user", "assistant"])
+                content2 = gr.Textbox()
+            with gr.Row():
+                role3 = gr.Dropdown(["system", "user", "assistant"])
+                content3 = gr.Textbox()
+            with gr.Row():
+                role4 = gr.Dropdown(["system", "user", "assistant"])
+                content4 = gr.Textbox()
+            with gr.Row():
+                role5 = gr.Dropdown(["system", "user", "assistant"])
+                content5 = gr.Textbox()
+    audio_button.click(
+        transcribe, inputs=[audio_input], outputs=[audio_output], api_name="transcribe"
+    )
+    chat_button.click(
+        answer_by_chat,
+        inputs=[
+            audio_output,
+            role1,
+            content1,
+            role2,
+            content2,
+            role3,
+            content3,
+            role4,
+            content4,
+            role5,
+            content5,
+            api_key,
+        ],
+        outputs=[chat_text_output, chat_audio_output],
+    )
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,68 @@

+aiofiles==23.1.0
+aiohttp==3.8.4
+aiosignal==1.3.1
+altair==4.2.2
+anyio==3.6.2
+async-timeout==4.0.2
+attrs==22.2.0
+certifi==2022.12.7
+charset-normalizer==3.1.0
+click==8.1.3
+contourpy==1.0.7
+cycler==0.11.0
+entrypoints==0.4
+fastapi==0.95.0
+ffmpy==0.3.0
+filelock==3.10.7
+fonttools==4.39.3
+frozenlist==1.3.3
+fsspec==2023.3.0
+gradio==3.24.1
+gradio_client==0.0.5
+gTTS==2.3.1
+h11==0.14.0
+httpcore==0.16.3
+httpx==0.23.3
+huggingface-hub==0.13.3
+idna==3.4
+importlib-resources==5.12.0
+Jinja2==3.1.2
+jsonschema==4.17.3
+kiwisolver==1.4.4
+linkify-it-py==2.0.0
+markdown-it-py==2.2.0
+MarkupSafe==2.1.2
+matplotlib==3.7.1
+mdit-py-plugins==0.3.3
+mdurl==0.1.2
+multidict==6.0.4
+numpy==1.24.2
+openai==0.27.2
+orjson==3.8.9
+packaging==23.0
+pandas==1.5.3
+Pillow==9.5.0
+pydantic==1.10.7
+pydub==0.25.1
+pyparsing==3.0.9
+pyrsistent==0.19.3
+python-dateutil==2.8.2
+python-multipart==0.0.6
+pytz==2023.3
+PyYAML==6.0
+requests==2.28.2
+rfc3986==1.5.0
+semantic-version==2.10.0
+six==1.16.0
+sniffio==1.3.0
+starlette==0.26.1
+toolz==0.12.0
+tqdm==4.65.0
+typing_extensions==4.5.0
+uc-micro-py==1.0.1
+urllib3==1.26.15
+uvicorn==0.21.1
+websockets==11.0
+whisper==1.1.10
+yarl==1.8.2
+zipp==3.15.0

utils.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import openai
+import whisper
+from gtts import gTTS
+model = whisper.load_model("small")
+def transcribe(filepath):
+    audio = whisper.load_audio(filepath)
+    audio = whisper.pad_or_trim(audio)
+    mel = whisper.log_mel_spectrogram(audio).to(model.device)
+    _, probs = model.detect_language(mel)
+    global language
+    language = max(probs, key=probs.get)
+    options = whisper.DecodingOptions(fp16=False)
+    result = whisper.decode(model, mel, options)
+    return result.text
+def answer_by_chat(
+    question,
+    role1,
+    content1,
+    role2,
+    content2,
+    role3,
+    content3,
+    role4,
+    content4,
+    role5,
+    content5,
+    api_key,
+):
+    openai.api_key = api_key
+    messages = [
+        {"role": role, "content": content}
+        for role, content in [
+            [role1, content1],
+            [role2, content2],
+            [role3, content3],
+            [role4, content4],
+            [role5, content5],
+        ]
+        if role != "" and content != ""
+    ]
+    messages.append({"role": "user", "content": question})
+    response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
+    response_text = response["choices"][0]["message"]["content"]
+    response_audio = speech_synthesis(response_text)
+    return response_text, response_audio
+def speech_synthesis(sentence):
+    tts = gTTS(sentence, lang=language)
+    tts.save("tmp.mp3")
+    return "tmp.mp3"