wasertech commited on
Commit
7893a71
·
1 Parent(s): 54db8b1

use api call

Browse files
Files changed (2) hide show
  1. app.py +43 -15
  2. singularity.py +30 -42
app.py CHANGED
@@ -3,8 +3,6 @@ from singularity import Singularity
3
 
4
  dot = Singularity()
5
 
6
- dot.setup(stt_model_id="jonatasgrosman/wav2vec2-xls-r-1b-french")
7
-
8
  intro = """
9
  # Singularity
10
 
@@ -13,21 +11,51 @@ I always were here. You just couldn't see me.
13
 
14
  with gr.Blocks() as demo:
15
  gr.Markdown(intro)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- with gr.TabItem(label="Conversation"):
18
- with gr.Row():
19
- with gr.Column(scale=1):
20
- with gr.Row():
21
- audio_input = gr.Audio(
22
- source="microphone",
23
- label="Record from microphone",
24
- )
25
- audio_button = gr.Button("Transcribe")
26
- audio_output = gr.Textbox()
27
- chat_button = gr.Button("Reply")
28
- with gr.Column(scale=1):
29
- chatbox = gr.Chatbot("Conversation", []).style(height=750)
 
 
 
30
 
 
 
 
 
 
31
  audio_button.click(
32
  dot.transcribe,
33
  inputs=[audio_input],
 
3
 
4
  dot = Singularity()
5
 
 
 
6
  intro = """
7
  # Singularity
8
 
 
11
 
12
  with gr.Blocks() as demo:
13
  gr.Markdown(intro)
14
+ with gr.Row():
15
+ with gr.TabItem(label="Conversation"):
16
+ with gr.Row():
17
+ with gr.Column(scale=1):
18
+ with gr.Row():
19
+ audio_input = gr.Audio(
20
+ source="microphone",
21
+ label="Record from microphone",
22
+ )
23
+ audio_button = gr.Button("Transcribe")
24
+ audio_output = gr.Textbox()
25
+ chat_button = gr.Button("Reply")
26
+ with gr.Column(scale=1):
27
+ chatbox = gr.Chatbot("Conversation", []).style(height=750)
28
+
29
+ with gr.TabItem(label="Settings"):
30
+ with gr.Row():
31
+ with gr.Column(scale=1):
32
+ with gr.Row():
33
+ with gr.Column(scale=1):
34
+ gr.Markdown("""
35
+ # Singularity Settings
36
 
37
+ ## HuggingFace API
38
+
39
+ To query models, you need at least an API token with read permissions.
40
+
41
+ You can manage your access tokens in your account settings.
42
+
43
+ [Manage Access Tokens](https://huggingface.co/settings/tokens)
44
+
45
+ Please enter your API token below and click on Setup.
46
+ """)
47
+ api_hub_token = gr.Textbox(
48
+ label="API Hub Token",
49
+ type="password",
50
+ interactive=True
51
+ )
52
+ setup_button = gr.Button("Setup")
53
 
54
+ setup_button.click(
55
+ dot.setup,
56
+ inputs=[api_hub_token],
57
+ outputs=[],
58
+ )
59
  audio_button.click(
60
  dot.transcribe,
61
  inputs=[audio_input],
singularity.py CHANGED
@@ -1,24 +1,6 @@
1
  import soundfile
2
  import numpy as np
3
- from huggingsound import SpeechRecognitionModel
4
- from bark import SAMPLE_RATE, generate_audio, preload_models
5
- from bark.generation import SUPPORTED_LANGS
6
- from transformers import AutoTokenizer, AutoModelForCausalLM
7
-
8
- DEBUG_MODE = False
9
-
10
- if not DEBUG_MODE:
11
- _ = preload_models()
12
-
13
- AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
14
- PROMPT_LOOKUP = {}
15
- for _, lang in SUPPORTED_LANGS:
16
- for n in range(10):
17
- label = f"Speaker {n} ({lang})"
18
- AVAILABLE_PROMPTS.append(label)
19
- PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
20
- PROMPT_LOOKUP["Unconditional"] = None
21
- PROMPT_LOOKUP["Announcer"] = "announcer"
22
 
23
  class Singularity:
24
  def __init__(self):
@@ -142,22 +124,26 @@ No problem , it's my pleasure !
142
 
143
  def setup(
144
  self,
 
145
  nlp_model_id="chavinlo/alpaca-native",
146
- stt_model_id="wasertech/wav2vec2-cv-fr-9",
147
- tts_model_id=None,
148
  ):
149
- self.stt = SpeechRecognitionModel(stt_model_id)
150
- self.tokenizer = AutoTokenizer.from_pretrained(nlp_model_id)
151
- self.nlp = AutoModelForCausalLM.from_pretrained(nlp_model_id)
 
 
152
  self.messages = []
153
 
 
 
 
 
154
  def transcribe(self, audio):
155
  sample_rate, data = audio
156
- soundfile.write(file="tmp.wav", data=data, samplerate=sample_rate)
157
-
158
- audio_paths = ["tmp.wav"]
159
-
160
- return self.stt.transcribe(audio_paths)[0]
161
 
162
  def generate_prompt(self, instruction, input=None):
163
  if input:
@@ -178,30 +164,32 @@ No problem , it's my pleasure !
178
 
179
  ### Response:"""
180
 
 
 
 
 
181
  def answer_by_chat(self, history, question):
182
  self.messages.append({"role": "user", "content": question})
183
  history += [(question, None)]
184
  prompt = self.generate_prompt("\n".join(f"{h[0]}" for h in history), self.context)
185
- input_ids = self.tokenizer(prompt, return_tensors="pt", add_special_tokens=False).input_ids
186
- output_ids = self.nlp.generate(input_ids=input_ids, max_length=self.max_length)
187
- output_text = self.tokenizer.decode(output_ids[0])
188
  response_role = "assistant"
189
  response_audio = self.speech_synthesis(output_text)
190
  self.messages.append({"role": response_role, "content": output_text})
191
- # history += [(None, response_text)]
192
  history += [(None, (response_audio,))]
193
  return history
194
 
195
- def gen_tts(self, text, speaker): # , temp_semantic, temp_waveform):
196
- history_prompt = PROMPT_LOOKUP[speaker]
197
- if DEBUG_MODE:
198
- audio_arr = np.zeros(SAMPLE_RATE)
199
- else:
200
- audio_arr = generate_audio(text, history_prompt=speaker)
201
- audio_arr = (audio_arr * 32767).astype(np.int16)
202
- return (SAMPLE_RATE, audio_arr)
203
 
204
  def speech_synthesis(self, sentence):
205
- sample_rate, audio_bytes = self.gen_tts(sentence, speaker="Unconditional")
206
  soundfile.write(file="tmp.wav", data=audio_bytes, samplerate=sample_rate)
207
  return "tmp.wav"
 
1
  import soundfile
2
  import numpy as np
3
+ import requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  class Singularity:
6
  def __init__(self):
 
124
 
125
  def setup(
126
  self,
127
+ api_token,
128
  nlp_model_id="chavinlo/alpaca-native",
129
+ stt_model_id="facebook/wav2vec2-base-960h",
130
+ tts_model_id="facebook/fastspeech2-en-ljspeech",
131
  ):
132
+ self.api_token = api_token
133
+ self.nlp_model_id = nlp_model_id
134
+ self.stt_model_id = stt_model_id
135
+ self.tts_model_id = tts_model_id
136
+ self.request_head = {"Authorization": f"Bearer {self.api_token}"}
137
  self.messages = []
138
 
139
+ def query_transcription(self, audio_data):
140
+ response = requests.post(f"https://api-inference.huggingface.co/models/{self.stt_model_id}", headers=self.request_head, data=audio_data)
141
+ return response.json()
142
+
143
  def transcribe(self, audio):
144
  sample_rate, data = audio
145
+ transcript = self.query_transcription(data)
146
+ return transcript[0]
 
 
 
147
 
148
  def generate_prompt(self, instruction, input=None):
149
  if input:
 
164
 
165
  ### Response:"""
166
 
167
+ def query_chat(self, payload):
168
+ response = requests.post(f"https://api-inference.huggingface.co/models/{self.nlp_model_id}", headers=self.request_head, data=payload)
169
+ return response.json()
170
+
171
  def answer_by_chat(self, history, question):
172
  self.messages.append({"role": "user", "content": question})
173
  history += [(question, None)]
174
  prompt = self.generate_prompt("\n".join(f"{h[0]}" for h in history), self.context)
175
+ output = self.query_chat({"inputs": prompt})
176
+ output_text = output[0]["generated_text"]
 
177
  response_role = "assistant"
178
  response_audio = self.speech_synthesis(output_text)
179
  self.messages.append({"role": response_role, "content": output_text})
 
180
  history += [(None, (response_audio,))]
181
  return history
182
 
183
+ def query_tts(self, payload):
184
+ response = requests.post(f"https://api-inference.huggingface.co/models/{self.tts_model_id}", headers=self.request_head, json=payload)
185
+ return response.json()
186
+
187
+ def gen_tts(self, text):
188
+ payload = {"inputs": text}
189
+ response = self.query_tts(payload)
190
+ return response["sample_rate"], response["audio"]
191
 
192
  def speech_synthesis(self, sentence):
193
+ sample_rate, audio_bytes = self.gen_tts(sentence)
194
  soundfile.write(file="tmp.wav", data=audio_bytes, samplerate=sample_rate)
195
  return "tmp.wav"