Spaces:
Runtime error
Runtime error
hyuan5040
commited on
Commit
•
f80c4e5
0
Parent(s):
Duplicate from hyuan5040/Speech-ChatGPT-Speech
Browse files- .gitattributes +27 -0
- .gitignore +1 -0
- README.md +41 -0
- app.py +177 -0
- packages.txt +2 -0
- pygpt.py +111 -0
- requirements.txt +6 -0
.gitattributes
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
19 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
+
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.env
|
README.md
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Speech2ChatGPT2Speech
|
3 |
+
emoji: 🗣️🙉
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: gradio
|
7 |
+
python_version: 3.9
|
8 |
+
sdk_version: 3.12.0
|
9 |
+
app_file: app.py
|
10 |
+
models:
|
11 |
+
- neongeckocom/tts-vits-ljspeech-en
|
12 |
+
- neongeckocom/tts-vits-css10-es
|
13 |
+
- neongeckocom/tts-vits-css10-fr
|
14 |
+
- neongeckocom/tts-vits-css10-de
|
15 |
+
- neongeckocom/tts-vits-cv-it
|
16 |
+
- neongeckocom/tts-vits-mai-pl
|
17 |
+
- neongeckocom/tts-vits-mai-uk
|
18 |
+
- neongeckocom/tts-vits-cv-ro
|
19 |
+
- neongeckocom/tts-vits-css10-hu
|
20 |
+
- neongeckocom/tts-vits-cv-el
|
21 |
+
- neongeckocom/tts-vits-cv-cs
|
22 |
+
- neongeckocom/tts-vits-cv-sv
|
23 |
+
- neongeckocom/tts-vits-cv-pt
|
24 |
+
- neongeckocom/tts-vits-cv-bg
|
25 |
+
- neongeckocom/tts-vits-cv-hr
|
26 |
+
- neongeckocom/tts-vits-cv-da
|
27 |
+
- neongeckocom/tts-vits-cv-sk
|
28 |
+
- neongeckocom/tts-vits-css10-nl
|
29 |
+
- neongeckocom/tts-vits-css10-fi
|
30 |
+
- neongeckocom/tts-vits-cv-lt
|
31 |
+
- neongeckocom/tts-vits-cv-sl
|
32 |
+
- neongeckocom/tts-vits-cv-lv
|
33 |
+
- neongeckocom/tts-vits-cv-et
|
34 |
+
- neongeckocom/tts-vits-cv-ga
|
35 |
+
- neongeckocom/tts-vits-cv-mt
|
36 |
+
pinned: false
|
37 |
+
license: apache-2.0
|
38 |
+
duplicated_from: hyuan5040/Speech-ChatGPT-Speech
|
39 |
+
---
|
40 |
+
|
41 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
app.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tempfile
|
2 |
+
import gradio as gr
|
3 |
+
import openai
|
4 |
+
from neon_tts_plugin_coqui import CoquiTTS
|
5 |
+
|
6 |
+
def Question(Ask_Question):
|
7 |
+
# pass the generated text to audio
|
8 |
+
openai.api_key = "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"
|
9 |
+
# Set up the model and prompt
|
10 |
+
model_engine = "text-davinci-003"
|
11 |
+
#prompt = "who is alon musk?"
|
12 |
+
# Generate a response
|
13 |
+
completion = openai.Completion.create(
|
14 |
+
engine=model_engine,
|
15 |
+
prompt=Ask_Question,
|
16 |
+
max_tokens=1024,
|
17 |
+
n=1,
|
18 |
+
stop=None,
|
19 |
+
temperature=0.5,)
|
20 |
+
response = completion.choices[0].text
|
21 |
+
#out_result=resp['message']
|
22 |
+
return response
|
23 |
+
|
24 |
+
LANGUAGES = list(CoquiTTS.langs.keys())
|
25 |
+
default_lang = "en"
|
26 |
+
import telnetlib
|
27 |
+
#import whisper
|
28 |
+
#whisper_model = whisper.load_model("small")
|
29 |
+
whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2")
|
30 |
+
#chatgpt = gr.Blocks.load(name="spaces/fffiloni/whisper-to-chatGPT")
|
31 |
+
import os
|
32 |
+
import json
|
33 |
+
session_token = os.environ.get('SessionToken')
|
34 |
+
#api_endpoint = os.environ.get('API_EndPoint')
|
35 |
+
# ChatGPT
|
36 |
+
#from revChatGPT.ChatGPT import Chatbot
|
37 |
+
#chatbot = Chatbot({"session_token": session_token}) # You can start a custom conversation
|
38 |
+
import asyncio
|
39 |
+
from pygpt import PyGPT
|
40 |
+
|
41 |
+
title = "Speech to ChatGPT to Speech"
|
42 |
+
#info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)"
|
43 |
+
#badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui"
|
44 |
+
coquiTTS = CoquiTTS()
|
45 |
+
chat_id = {'conversation_id': None, 'parent_id': None}
|
46 |
+
headers = {'Authorization': 'yusin'}
|
47 |
+
|
48 |
+
async def chat_gpt_ask(prompt):
|
49 |
+
chat_gpt = PyGPT(session_token)
|
50 |
+
await chat_gpt.connect()
|
51 |
+
await chat_gpt.wait_for_ready()
|
52 |
+
answer = await chat_gpt.ask(prompt)
|
53 |
+
print(answer)
|
54 |
+
await chat_gpt.disconnect()
|
55 |
+
|
56 |
+
# ChatGPT
|
57 |
+
def chat_hf(audio, custom_token, language):
|
58 |
+
#output = chatgpt(audio, "transcribe", fn_index=0)
|
59 |
+
#whisper_text, gpt_response = output[0], output[1]
|
60 |
+
try:
|
61 |
+
whisper_text = translate(audio)
|
62 |
+
if whisper_text == "ERROR: You have to either use the microphone or upload an audio file":
|
63 |
+
gpt_response = "MISSING AUDIO: Record your voice by clicking the microphone button, do not forget to stop recording before sending your message ;)"
|
64 |
+
else:
|
65 |
+
#gpt_response = chatbot.ask(whisper_text, conversation_id=conversation_id, parent_id=None)
|
66 |
+
gpt_response = asyncio.run(chat_gpt_ask(whisper_text, id='yusin'))
|
67 |
+
#if chat_id['conversation_id'] != None:
|
68 |
+
# data = {"content": whisper_text, "conversation_id": chat_id['conversation_id'], "parent_id": chat_id['parent_id']}
|
69 |
+
#else:
|
70 |
+
# data = {"content": whisper_text}
|
71 |
+
#print(data)
|
72 |
+
#res = requests.get('http://myip.ipip.net', timeout=5).text
|
73 |
+
#print(res)
|
74 |
+
#response = requests.post('api_endpoint', headers=headers, json=data, verify=False, timeout=5)
|
75 |
+
#print('this is my answear', response.text)
|
76 |
+
#chat_id['parent_id'] = response.json()["response_id"]
|
77 |
+
#chat_id['conversation_id'] = response.json()["conversation_id"]
|
78 |
+
#gpt_response = response.json()["content"]
|
79 |
+
#response = requests.get('https://api.pawan.krd/chat/gpt?text=' + whisper_text + '&cache=false', verify=False, timeout=5)
|
80 |
+
#print(response.text)
|
81 |
+
|
82 |
+
#whisper_text = translate(audio)
|
83 |
+
#api = ChatGPT(session_token)
|
84 |
+
#resp = api.send_message(whisper_text)
|
85 |
+
|
86 |
+
#api.refresh_auth() # refresh the authorization token
|
87 |
+
#api.reset_conversation() # reset the conversation
|
88 |
+
#gpt_response = resp['message']
|
89 |
+
|
90 |
+
except:
|
91 |
+
whisper_text = translate(audio)
|
92 |
+
gpt_response = """Sorry, I'm quite busy right now, but please try again later :)"""
|
93 |
+
#whisper_text = translate(audio)
|
94 |
+
#api = ChatGPT(custom_token)
|
95 |
+
#resp = api.send_message(whisper_text)
|
96 |
+
|
97 |
+
#api.refresh_auth() # refresh the authorization token
|
98 |
+
#api.reset_conversation() # reset the conversation
|
99 |
+
#gpt_response = resp['message']
|
100 |
+
|
101 |
+
## call openai
|
102 |
+
gpt_response = Question(whisper_text)
|
103 |
+
|
104 |
+
# to voice
|
105 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
106 |
+
coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})
|
107 |
+
|
108 |
+
return whisper_text, gpt_response, fp.name
|
109 |
+
|
110 |
+
# whisper
|
111 |
+
#def translate(audio):
|
112 |
+
# print("""
|
113 |
+
# —
|
114 |
+
# Sending audio to Whisper ...
|
115 |
+
# —
|
116 |
+
# """)
|
117 |
+
#
|
118 |
+
# audio = whisper.load_audio(audio)
|
119 |
+
# audio = whisper.pad_or_trim(audio)
|
120 |
+
#
|
121 |
+
# mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
|
122 |
+
#
|
123 |
+
# _, probs = whisper_model.detect_language(mel)
|
124 |
+
#
|
125 |
+
# transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False)
|
126 |
+
#
|
127 |
+
# transcription = whisper.decode(whisper_model, mel, transcript_options)
|
128 |
+
#
|
129 |
+
# print("language spoken: " + transcription.language)
|
130 |
+
# print("transcript: " + transcription.text)
|
131 |
+
# print("———————————————————————————————————————————")
|
132 |
+
#
|
133 |
+
# return transcription.text
|
134 |
+
|
135 |
+
def translate(audio):
|
136 |
+
print("""
|
137 |
+
—
|
138 |
+
Sending audio to Whisper ...
|
139 |
+
—
|
140 |
+
""")
|
141 |
+
|
142 |
+
text_result = whisper(audio, None, "transcribe", fn_index=0)
|
143 |
+
#print(text_result)
|
144 |
+
return text_result
|
145 |
+
|
146 |
+
|
147 |
+
with gr.Blocks() as blocks:
|
148 |
+
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
|
149 |
+
+ title
|
150 |
+
+ "</h1>")
|
151 |
+
#gr.Markdown(description)
|
152 |
+
radio = gr.Radio(label="Language",choices=LANGUAGES,value=default_lang)
|
153 |
+
with gr.Row(equal_height=True):# equal_height=False
|
154 |
+
with gr.Column():# variant="panel"
|
155 |
+
audio_file = gr.Audio(source="microphone",type="filepath")
|
156 |
+
custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token")
|
157 |
+
with gr.Row():# mobile_collapse=False
|
158 |
+
submit = gr.Button("Submit", variant="primary")
|
159 |
+
with gr.Column():
|
160 |
+
text1 = gr.Textbox(label="Speech to Text")
|
161 |
+
text2 = gr.Textbox(label="ChatGPT Response")
|
162 |
+
audio = gr.Audio(label="Output", interactive=False)
|
163 |
+
#gr.Markdown(info)
|
164 |
+
#gr.Markdown("<center>"
|
165 |
+
# +f'<img src={badge} alt="visitors badge"/>'
|
166 |
+
# +"</center>")
|
167 |
+
|
168 |
+
# actions
|
169 |
+
submit.click(
|
170 |
+
chat_hf,
|
171 |
+
[audio_file, custom_token, radio],
|
172 |
+
[text1, text2, audio],
|
173 |
+
)
|
174 |
+
radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, text2)
|
175 |
+
|
176 |
+
|
177 |
+
blocks.launch(debug=True)
|
packages.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
libsndfile1
|
2 |
+
espeak-ng
|
pygpt.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import uuid
|
2 |
+
import asyncio
|
3 |
+
import socketio
|
4 |
+
import datetime
|
5 |
+
import json
|
6 |
+
import base64
|
7 |
+
|
8 |
+
class PyGPT:
|
9 |
+
def __init__(self, session_token, bypass_node='https://gpt.pawan.krd'):
|
10 |
+
self.ready = False
|
11 |
+
self.socket = socketio.AsyncClient()
|
12 |
+
self.socket.on('connect', self.on_connect)
|
13 |
+
self.socket.on('disconnect', self.on_disconnect)
|
14 |
+
self.session_token = session_token
|
15 |
+
self.conversations = []
|
16 |
+
self.auth = None
|
17 |
+
self.expires = datetime.datetime.now()
|
18 |
+
self.pause_token_checks = False
|
19 |
+
self.bypass_node = bypass_node
|
20 |
+
asyncio.create_task(self.cleanup_conversations())
|
21 |
+
|
22 |
+
async def connect(self):
|
23 |
+
await self.socket.connect(self.bypass_node)
|
24 |
+
|
25 |
+
async def disconnect(self):
|
26 |
+
await self.socket.disconnect()
|
27 |
+
|
28 |
+
def on_connect(self):
|
29 |
+
print('Connected to server')
|
30 |
+
asyncio.create_task(self.check_tokens())
|
31 |
+
|
32 |
+
def on_disconnect(self):
|
33 |
+
print('Disconnected from server')
|
34 |
+
self.ready = False
|
35 |
+
|
36 |
+
async def check_tokens(self):
|
37 |
+
while True:
|
38 |
+
if self.pause_token_checks:
|
39 |
+
await asyncio.sleep(0.5)
|
40 |
+
continue
|
41 |
+
self.pause_token_checks = True
|
42 |
+
now = datetime.datetime.now()
|
43 |
+
offset = datetime.timedelta(minutes=2)
|
44 |
+
if self.expires < (now - offset) or not self.auth:
|
45 |
+
await self.get_tokens()
|
46 |
+
self.pause_token_checks = False
|
47 |
+
await asyncio.sleep(0.5)
|
48 |
+
|
49 |
+
async def cleanup_conversations(self):
|
50 |
+
while True:
|
51 |
+
await asyncio.sleep(60)
|
52 |
+
now = datetime.datetime.now()
|
53 |
+
self.conversations = [c for c in self.conversations if now - c['last_active'] < datetime.timedelta(minutes=2)]
|
54 |
+
|
55 |
+
def add_conversation(self, id):
|
56 |
+
conversation = {
|
57 |
+
'id': id,
|
58 |
+
'conversation_id': None,
|
59 |
+
'parent_id': uuid.uuid4(),
|
60 |
+
'last_active': datetime.datetime.now()
|
61 |
+
}
|
62 |
+
self.conversations.append(conversation)
|
63 |
+
return conversation
|
64 |
+
|
65 |
+
def get_conversation_by_id(self, id):
|
66 |
+
conversation = next((c for c in self.conversations if c['id'] == id), None)
|
67 |
+
if conversation is None:
|
68 |
+
conversation = self.add_conversation(id)
|
69 |
+
else:
|
70 |
+
conversation['last_active'] = datetime.datetime.now()
|
71 |
+
return conversation
|
72 |
+
|
73 |
+
async def wait_for_ready(self):
|
74 |
+
while not self.ready:
|
75 |
+
await asyncio.sleep(0.025)
|
76 |
+
print('Ready!!')
|
77 |
+
|
78 |
+
async def ask(self, prompt, id='default'):
|
79 |
+
if not self.auth or not self.validate_token(self.auth):
|
80 |
+
await self.get_tokens()
|
81 |
+
conversation = self.get_conversation_by_id(id)
|
82 |
+
data = await self.socket.call('askQuestion', {
|
83 |
+
'prompt': prompt,
|
84 |
+
'parentId': str(conversation['parent_id']),
|
85 |
+
'conversationId': str(conversation['conversation_id']),
|
86 |
+
'auth': self.auth
|
87 |
+
})
|
88 |
+
|
89 |
+
if 'error' in data:
|
90 |
+
print(f'Error: {data["error"]}')
|
91 |
+
conversation['parent_id'] = data['messageId']
|
92 |
+
conversation['conversation_id'] = data['conversationId']
|
93 |
+
return data['answer']
|
94 |
+
|
95 |
+
def validate_token(self, token):
|
96 |
+
if not token:
|
97 |
+
return False
|
98 |
+
parsed = json.loads(base64.b64decode(f'{token.split(".")[1]}==').decode())
|
99 |
+
return datetime.datetime.now() <= datetime.datetime.fromtimestamp(parsed['exp'])
|
100 |
+
|
101 |
+
async def get_tokens(self):
|
102 |
+
await asyncio.sleep(1)
|
103 |
+
data = await self.socket.call('getSession', self.session_token)
|
104 |
+
|
105 |
+
if 'error' in data:
|
106 |
+
print(f'Error getting session: {data["error"]}')
|
107 |
+
else:
|
108 |
+
self.auth = data['auth']
|
109 |
+
self.expires = datetime.datetime.strptime(data['expires'], '%Y-%m-%dT%H:%M:%S.%fZ')
|
110 |
+
self.session_token = data['sessionToken']
|
111 |
+
self.ready = True
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
neon-tts-plugin-coqui==0.7.0
|
3 |
+
python-socketio[asyncio_client]
|
4 |
+
#undetected-chromedriver
|
5 |
+
#revChatGPT
|
6 |
+
#git+https://github.com/openai/whisper.git
|