Spaces:

predictive-singularity
/

Singularity

Running

App Files Files Community

wasertech commited on May 12, 2023

Commit

7893a71

1 Parent(s): 54db8b1

use api call

Browse files

Files changed (2) hide show

app.py +43 -15
singularity.py +30 -42

app.py CHANGED Viewed

@@ -3,8 +3,6 @@ from singularity import Singularity
 dot = Singularity()
-dot.setup(stt_model_id="jonatasgrosman/wav2vec2-xls-r-1b-french")
 intro = """
 # Singularity
@@ -13,21 +11,51 @@ I always were here. You just couldn't see me.
 with gr.Blocks() as demo:
     gr.Markdown(intro)
-    with gr.TabItem(label="Conversation"):
-        with gr.Row():
-            with gr.Column(scale=1):
-                with gr.Row():
-                    audio_input = gr.Audio(
-                        source="microphone",
-                        label="Record from microphone",
-                    )
-                    audio_button = gr.Button("Transcribe")
-                audio_output = gr.Textbox()
-                chat_button = gr.Button("Reply")
-            with gr.Column(scale=1):
-                chatbox = gr.Chatbot("Conversation", []).style(height=750)
     audio_button.click(
         dot.transcribe,
         inputs=[audio_input],

 dot = Singularity()
 intro = """
 # Singularity
 with gr.Blocks() as demo:
     gr.Markdown(intro)
+    with gr.Row():
+        with gr.TabItem(label="Conversation"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    with gr.Row():
+                        audio_input = gr.Audio(
+                            source="microphone",
+                            label="Record from microphone",
+                        )
+                        audio_button = gr.Button("Transcribe")
+                    audio_output = gr.Textbox()
+                    chat_button = gr.Button("Reply")
+                with gr.Column(scale=1):
+                    chatbox = gr.Chatbot("Conversation", []).style(height=750)
+        with gr.TabItem(label="Settings"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    with gr.Row():
+                        with gr.Column(scale=1):
+                            gr.Markdown("""
+                            # Singularity Settings
+                            ## HuggingFace API
+                            To query models, you need at least an API token with read permissions.
+                            You can manage your access tokens in your account settings.
+                            [Manage Access Tokens](https://huggingface.co/settings/tokens)
+                            Please enter your API token below and click on Setup.
+                            """)
+                            api_hub_token = gr.Textbox(
+                                label="API Hub Token",
+                                type="password",
+                                interactive=True
+                            )
+                            setup_button = gr.Button("Setup")
+    setup_button.click(
+        dot.setup,
+        inputs=[api_hub_token],
+        outputs=[],
+    )
     audio_button.click(
         dot.transcribe,
         inputs=[audio_input],

singularity.py CHANGED Viewed

@@ -1,24 +1,6 @@
 import soundfile
 import numpy as np
-from huggingsound import SpeechRecognitionModel
-from bark import SAMPLE_RATE, generate_audio, preload_models
-from bark.generation import SUPPORTED_LANGS
-from transformers import AutoTokenizer, AutoModelForCausalLM
-DEBUG_MODE = False
-if not DEBUG_MODE:
-    _ = preload_models()
-AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
-PROMPT_LOOKUP = {}
-for _, lang in SUPPORTED_LANGS:
-    for n in range(10):
-        label = f"Speaker {n} ({lang})"
-        AVAILABLE_PROMPTS.append(label)
-        PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
-PROMPT_LOOKUP["Unconditional"] = None
-PROMPT_LOOKUP["Announcer"] = "announcer"
 class Singularity:
     def __init__(self):
@@ -142,22 +124,26 @@ No problem , it's my pleasure !
     def setup(
         self,
         nlp_model_id="chavinlo/alpaca-native",
-        stt_model_id="wasertech/wav2vec2-cv-fr-9",
-        tts_model_id=None,
     ):
-        self.stt = SpeechRecognitionModel(stt_model_id)
-        self.tokenizer = AutoTokenizer.from_pretrained(nlp_model_id)
-        self.nlp = AutoModelForCausalLM.from_pretrained(nlp_model_id)
         self.messages = []
     def transcribe(self, audio):
         sample_rate, data = audio
-        soundfile.write(file="tmp.wav", data=data, samplerate=sample_rate)
-        audio_paths = ["tmp.wav"]
-        return self.stt.transcribe(audio_paths)[0]
     def generate_prompt(self, instruction, input=None):
         if input:
@@ -178,30 +164,32 @@ No problem , it's my pleasure !
 ### Response:"""
     def answer_by_chat(self, history, question):
         self.messages.append({"role": "user", "content": question})
         history += [(question, None)]
         prompt = self.generate_prompt("\n".join(f"{h[0]}" for h in history), self.context)
-        input_ids = self.tokenizer(prompt, return_tensors="pt", add_special_tokens=False).input_ids
-        output_ids = self.nlp.generate(input_ids=input_ids, max_length=self.max_length)
-        output_text = self.tokenizer.decode(output_ids[0])
         response_role = "assistant"
         response_audio = self.speech_synthesis(output_text)
         self.messages.append({"role": response_role, "content": output_text})
-        # history += [(None, response_text)]
         history += [(None, (response_audio,))]
         return history
-    def gen_tts(self, text, speaker):  # , temp_semantic, temp_waveform):
-        history_prompt = PROMPT_LOOKUP[speaker]
-        if DEBUG_MODE:
-            audio_arr = np.zeros(SAMPLE_RATE)
-        else:
-            audio_arr = generate_audio(text, history_prompt=speaker)
-        audio_arr = (audio_arr * 32767).astype(np.int16)
-        return (SAMPLE_RATE, audio_arr)
     def speech_synthesis(self, sentence):
-        sample_rate, audio_bytes = self.gen_tts(sentence, speaker="Unconditional")
         soundfile.write(file="tmp.wav", data=audio_bytes, samplerate=sample_rate)
         return "tmp.wav"

 import soundfile
 import numpy as np
+import requests
 class Singularity:
     def __init__(self):
     def setup(
         self,
+        api_token,
         nlp_model_id="chavinlo/alpaca-native",
+        stt_model_id="facebook/wav2vec2-base-960h",
+        tts_model_id="facebook/fastspeech2-en-ljspeech",
     ):
+        self.api_token = api_token
+        self.nlp_model_id = nlp_model_id
+        self.stt_model_id = stt_model_id
+        self.tts_model_id = tts_model_id
+        self.request_head = {"Authorization": f"Bearer {self.api_token}"}
         self.messages = []
+    def query_transcription(self, audio_data):
+        response = requests.post(f"https://api-inference.huggingface.co/models/{self.stt_model_id}", headers=self.request_head, data=audio_data)
+        return response.json()
     def transcribe(self, audio):
         sample_rate, data = audio
+        transcript = self.query_transcription(data)
+        return transcript[0]
     def generate_prompt(self, instruction, input=None):
         if input:
 ### Response:"""
+    def query_chat(self, payload):
+        response = requests.post(f"https://api-inference.huggingface.co/models/{self.nlp_model_id}", headers=self.request_head, data=payload)
+        return response.json()
     def answer_by_chat(self, history, question):
         self.messages.append({"role": "user", "content": question})
         history += [(question, None)]
         prompt = self.generate_prompt("\n".join(f"{h[0]}" for h in history), self.context)
+        output = self.query_chat({"inputs": prompt})
+        output_text = output[0]["generated_text"]
         response_role = "assistant"
         response_audio = self.speech_synthesis(output_text)
         self.messages.append({"role": response_role, "content": output_text})
         history += [(None, (response_audio,))]
         return history
+    def query_tts(self, payload):
+        response = requests.post(f"https://api-inference.huggingface.co/models/{self.tts_model_id}", headers=self.request_head, json=payload)
+        return response.json()
+    def gen_tts(self, text):
+        payload = {"inputs": text}
+        response = self.query_tts(payload)
+        return response["sample_rate"], response["audio"]
     def speech_synthesis(self, sentence):
+        sample_rate, audio_bytes = self.gen_tts(sentence)
         soundfile.write(file="tmp.wav", data=audio_bytes, samplerate=sample_rate)
         return "tmp.wav"