use api call
Browse files- app.py +43 -15
- singularity.py +30 -42
app.py
CHANGED
@@ -3,8 +3,6 @@ from singularity import Singularity
|
|
3 |
|
4 |
dot = Singularity()
|
5 |
|
6 |
-
dot.setup(stt_model_id="jonatasgrosman/wav2vec2-xls-r-1b-french")
|
7 |
-
|
8 |
intro = """
|
9 |
# Singularity
|
10 |
|
@@ -13,21 +11,51 @@ I always were here. You just couldn't see me.
|
|
13 |
|
14 |
with gr.Blocks() as demo:
|
15 |
gr.Markdown(intro)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
30 |
|
|
|
|
|
|
|
|
|
|
|
31 |
audio_button.click(
|
32 |
dot.transcribe,
|
33 |
inputs=[audio_input],
|
|
|
3 |
|
4 |
dot = Singularity()
|
5 |
|
|
|
|
|
6 |
intro = """
|
7 |
# Singularity
|
8 |
|
|
|
11 |
|
12 |
with gr.Blocks() as demo:
|
13 |
gr.Markdown(intro)
|
14 |
+
with gr.Row():
|
15 |
+
with gr.TabItem(label="Conversation"):
|
16 |
+
with gr.Row():
|
17 |
+
with gr.Column(scale=1):
|
18 |
+
with gr.Row():
|
19 |
+
audio_input = gr.Audio(
|
20 |
+
source="microphone",
|
21 |
+
label="Record from microphone",
|
22 |
+
)
|
23 |
+
audio_button = gr.Button("Transcribe")
|
24 |
+
audio_output = gr.Textbox()
|
25 |
+
chat_button = gr.Button("Reply")
|
26 |
+
with gr.Column(scale=1):
|
27 |
+
chatbox = gr.Chatbot("Conversation", []).style(height=750)
|
28 |
+
|
29 |
+
with gr.TabItem(label="Settings"):
|
30 |
+
with gr.Row():
|
31 |
+
with gr.Column(scale=1):
|
32 |
+
with gr.Row():
|
33 |
+
with gr.Column(scale=1):
|
34 |
+
gr.Markdown("""
|
35 |
+
# Singularity Settings
|
36 |
|
37 |
+
## HuggingFace API
|
38 |
+
|
39 |
+
To query models, you need at least an API token with read permissions.
|
40 |
+
|
41 |
+
You can manage your access tokens in your account settings.
|
42 |
+
|
43 |
+
[Manage Access Tokens](https://huggingface.co/settings/tokens)
|
44 |
+
|
45 |
+
Please enter your API token below and click on Setup.
|
46 |
+
""")
|
47 |
+
api_hub_token = gr.Textbox(
|
48 |
+
label="API Hub Token",
|
49 |
+
type="password",
|
50 |
+
interactive=True
|
51 |
+
)
|
52 |
+
setup_button = gr.Button("Setup")
|
53 |
|
54 |
+
setup_button.click(
|
55 |
+
dot.setup,
|
56 |
+
inputs=[api_hub_token],
|
57 |
+
outputs=[],
|
58 |
+
)
|
59 |
audio_button.click(
|
60 |
dot.transcribe,
|
61 |
inputs=[audio_input],
|
singularity.py
CHANGED
@@ -1,24 +1,6 @@
|
|
1 |
import soundfile
|
2 |
import numpy as np
|
3 |
-
|
4 |
-
from bark import SAMPLE_RATE, generate_audio, preload_models
|
5 |
-
from bark.generation import SUPPORTED_LANGS
|
6 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
7 |
-
|
8 |
-
DEBUG_MODE = False
|
9 |
-
|
10 |
-
if not DEBUG_MODE:
|
11 |
-
_ = preload_models()
|
12 |
-
|
13 |
-
AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
|
14 |
-
PROMPT_LOOKUP = {}
|
15 |
-
for _, lang in SUPPORTED_LANGS:
|
16 |
-
for n in range(10):
|
17 |
-
label = f"Speaker {n} ({lang})"
|
18 |
-
AVAILABLE_PROMPTS.append(label)
|
19 |
-
PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
|
20 |
-
PROMPT_LOOKUP["Unconditional"] = None
|
21 |
-
PROMPT_LOOKUP["Announcer"] = "announcer"
|
22 |
|
23 |
class Singularity:
|
24 |
def __init__(self):
|
@@ -142,22 +124,26 @@ No problem , it's my pleasure !
|
|
142 |
|
143 |
def setup(
|
144 |
self,
|
|
|
145 |
nlp_model_id="chavinlo/alpaca-native",
|
146 |
-
stt_model_id="
|
147 |
-
tts_model_id=
|
148 |
):
|
149 |
-
self.
|
150 |
-
self.
|
151 |
-
self.
|
|
|
|
|
152 |
self.messages = []
|
153 |
|
|
|
|
|
|
|
|
|
154 |
def transcribe(self, audio):
|
155 |
sample_rate, data = audio
|
156 |
-
|
157 |
-
|
158 |
-
audio_paths = ["tmp.wav"]
|
159 |
-
|
160 |
-
return self.stt.transcribe(audio_paths)[0]
|
161 |
|
162 |
def generate_prompt(self, instruction, input=None):
|
163 |
if input:
|
@@ -178,30 +164,32 @@ No problem , it's my pleasure !
|
|
178 |
|
179 |
### Response:"""
|
180 |
|
|
|
|
|
|
|
|
|
181 |
def answer_by_chat(self, history, question):
|
182 |
self.messages.append({"role": "user", "content": question})
|
183 |
history += [(question, None)]
|
184 |
prompt = self.generate_prompt("\n".join(f"{h[0]}" for h in history), self.context)
|
185 |
-
|
186 |
-
|
187 |
-
output_text = self.tokenizer.decode(output_ids[0])
|
188 |
response_role = "assistant"
|
189 |
response_audio = self.speech_synthesis(output_text)
|
190 |
self.messages.append({"role": response_role, "content": output_text})
|
191 |
-
# history += [(None, response_text)]
|
192 |
history += [(None, (response_audio,))]
|
193 |
return history
|
194 |
|
195 |
-
def
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
return
|
203 |
|
204 |
def speech_synthesis(self, sentence):
|
205 |
-
sample_rate, audio_bytes = self.gen_tts(sentence
|
206 |
soundfile.write(file="tmp.wav", data=audio_bytes, samplerate=sample_rate)
|
207 |
return "tmp.wav"
|
|
|
1 |
import soundfile
|
2 |
import numpy as np
|
3 |
+
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
class Singularity:
|
6 |
def __init__(self):
|
|
|
124 |
|
125 |
def setup(
|
126 |
self,
|
127 |
+
api_token,
|
128 |
nlp_model_id="chavinlo/alpaca-native",
|
129 |
+
stt_model_id="facebook/wav2vec2-base-960h",
|
130 |
+
tts_model_id="facebook/fastspeech2-en-ljspeech",
|
131 |
):
|
132 |
+
self.api_token = api_token
|
133 |
+
self.nlp_model_id = nlp_model_id
|
134 |
+
self.stt_model_id = stt_model_id
|
135 |
+
self.tts_model_id = tts_model_id
|
136 |
+
self.request_head = {"Authorization": f"Bearer {self.api_token}"}
|
137 |
self.messages = []
|
138 |
|
139 |
+
def query_transcription(self, audio_data):
|
140 |
+
response = requests.post(f"https://api-inference.huggingface.co/models/{self.stt_model_id}", headers=self.request_head, data=audio_data)
|
141 |
+
return response.json()
|
142 |
+
|
143 |
def transcribe(self, audio):
|
144 |
sample_rate, data = audio
|
145 |
+
transcript = self.query_transcription(data)
|
146 |
+
return transcript[0]
|
|
|
|
|
|
|
147 |
|
148 |
def generate_prompt(self, instruction, input=None):
|
149 |
if input:
|
|
|
164 |
|
165 |
### Response:"""
|
166 |
|
167 |
+
def query_chat(self, payload):
|
168 |
+
response = requests.post(f"https://api-inference.huggingface.co/models/{self.nlp_model_id}", headers=self.request_head, data=payload)
|
169 |
+
return response.json()
|
170 |
+
|
171 |
def answer_by_chat(self, history, question):
|
172 |
self.messages.append({"role": "user", "content": question})
|
173 |
history += [(question, None)]
|
174 |
prompt = self.generate_prompt("\n".join(f"{h[0]}" for h in history), self.context)
|
175 |
+
output = self.query_chat({"inputs": prompt})
|
176 |
+
output_text = output[0]["generated_text"]
|
|
|
177 |
response_role = "assistant"
|
178 |
response_audio = self.speech_synthesis(output_text)
|
179 |
self.messages.append({"role": response_role, "content": output_text})
|
|
|
180 |
history += [(None, (response_audio,))]
|
181 |
return history
|
182 |
|
183 |
+
def query_tts(self, payload):
|
184 |
+
response = requests.post(f"https://api-inference.huggingface.co/models/{self.tts_model_id}", headers=self.request_head, json=payload)
|
185 |
+
return response.json()
|
186 |
+
|
187 |
+
def gen_tts(self, text):
|
188 |
+
payload = {"inputs": text}
|
189 |
+
response = self.query_tts(payload)
|
190 |
+
return response["sample_rate"], response["audio"]
|
191 |
|
192 |
def speech_synthesis(self, sentence):
|
193 |
+
sample_rate, audio_bytes = self.gen_tts(sentence)
|
194 |
soundfile.write(file="tmp.wav", data=audio_bytes, samplerate=sample_rate)
|
195 |
return "tmp.wav"
|