Spaces:

mipbkhn
/

SmartGPTpublic

Runtime error

App Files Files Community

mipbkhn commited on Oct 12, 2023

Commit

cbe6c2e

•

1 Parent(s): e3aee65

Init

Browse files

Files changed (5) hide show

.gitignore +2 -0
app.ipynb +92 -0
app.py +58 -0
gradio_article.md +16 -0
requirements.txt +9 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ tmp.ipynb
2	+ tmp.mp3

app.ipynb ADDED Viewed

	@@ -0,0 +1,92 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "playsound is relying on another python subprocess. Please use `pip install pygobject` if you want playsound to run more efficiently.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import openai\n",
+    "from playsound import playsound\n",
+    "from gtts import gTTS\n",
+    "import speech_recognition as sr\n",
+    "import gradio as gr\n",
+    "\n",
+    "openai.api_key = \"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\"\n",
+    "# will hide the api key:\n",
+    "# import openai_secret_manager\n",
+    "# assert \"openai\" in openai_secret_manager.get_services()\n",
+    "# secrets = openai_secret_manager.get_secret(\"openai\")\n",
+    "# openai.api_key = secrets[\"api_key\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_response(prompt):\n",
+    "    prompt = (f\"{prompt}\")\n",
+    "\n",
+    "    response = openai.ChatCompletion.create(\n",
+    "        model=\"gpt-3.5-turbo\",\n",
+    "        messages=[\n",
+    "            {\"role\": \"user\", \"content\": f\"{prompt}\"},\n",
+    "        ])\n",
+    "\n",
+    "    message = response.choices[0]['message']['content']\n",
+    "    return message"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"I apologize, but as a language model AI, I don't have access to real-time information. Could you please check the time on your device or ask a nearby clock?\""
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "generate_response(\"What time is it?\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

app.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import openai
+from playsound import playsound
+from gtts import gTTS
+import speech_recognition as sr
+import gradio as gr
+openai.api_key = api_key
+def generate_response(prompt):
+    prompt = (f"{prompt}")
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "user", "content": f"{prompt}"},
+        ])
+    message = response.choices[0]['message']['content']
+    return message
+r = sr.Recognizer()
+from pydub import AudioSegment
+def transcribe(audio, lang):
+    with sr.AudioFile(audio) as source: audio = r.record(source)
+    text = r.recognize_google(audio, language=lang)
+    text = generate_response(text)
+    tts = gTTS(text=text, lang=lang)
+    out = "tmp.mp3"
+    tts.save(out)
+    return out
+with open('gradio_article.md') as f:
+    article = f.read()
+interface_options = {
+    "title": "Smart GPT",
+    "description": "Let's have a chat! Talk to me, and I'll respond in a jiffy",
+    "article": article,
+    "layout": "horizontal",
+    "theme": "default",
+}
+inputs = gr.Audio(source="microphone", type="filepath")
+outputs = "audio"
+lang = gr.Dropdown(choices=["en", "vi", "nl"], value="en")
+gr.Interface(fn=transcribe, inputs=[inputs, lang], outputs=outputs, live=True,
+                      **interface_options).launch()
+# TODO
+# Custom voice
+    # VALL-E
+    # https://cloud.google.com/text-to-speech/custom-voice/docs/quickstart
+    # Mozilla TTS
+    # OpenSeq2Seq
+    # Best VN: Vbee, FPT
+    # Elevenlabs for English

gradio_article.md ADDED Viewed

	@@ -0,0 +1,16 @@

+## Description
+Hey there,
+I made this cool app, mainly for my little son. It's a fun voice-controlled thing where he can chat with it and get spoken answers. For example, he can ask it to tell a story or find out stuff he's curious about.
+Here's how it works:
+I used a bunch of libraries like openai, playsound, gtts, speech_recognition, and gradio. The app records what you say into a microphone and sends it to OpenAI for answers. Then, it turns those answers into speech and plays them back to you.
+There's a "transcribe" function that takes audio and a language code, listens to what you say, asks OpenAI for answers, turns those answers into speech, and stores it temporarily. Then it gives you the path to the spoken response.
+The app also has a simple user interface made with gradio. You can pick a language and talk into the microphone. The app transcribes your words, gets answers, and plays them back to you.
+I'm thinking of adding even more fun stuff like custom voices from VALL-E, Mozilla TTS, OpenSeq2Seq, Vbee, FPT, or Elevenlabs. So I can train my own voice to use for the generated response. Isn't it interesting?
+This is a fun and easy way to have voice conversations and learn new things. So, it's not just for my son; anyone can enjoy it!

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio==3.39.0
+gTTS==2.3.1
+openai==0.27.4
+playsound==1.3.0
+SpeechRecognition==3.9.0
+transformers
+torch
+# pygobject
+ffmpeg