Spaces:

WWMachine
/

test

Sleeping

App Files Files Community

WWMachine commited on Dec 2, 2025

Commit

5a6a6ff

verified ·

1 Parent(s): 29a7fe7

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -70

app.py CHANGED Viewed

@@ -2,107 +2,158 @@ import gradio as gr
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import os
 # --- Configuration ---
-MODEL_REPO = "Kezovic/iris-q4gguf-v2"
-MODEL_FILE = "llama-3.2-1b-instruct.Q4_K_M.gguf"
 CONTEXT_WINDOW = 4096
 MAX_NEW_TOKENS = 512
 TEMPERATURE = 0.7
 # --- Model Loading Function ---
-# Initialize llm as None to avoid the Llama.__del__ 'NoneType' error
-llm = None
 def load_llm():
     """Downloads the GGUF model and initializes LlamaCPP."""
-    global llm # Use the global variable
-    print("Downloading model...")
     try:
         model_path = hf_hub_download(
-            repo_id=MODEL_REPO,
-            filename=MODEL_FILE
         )
         llm = Llama(
             model_path=model_path,
             n_ctx=CONTEXT_WINDOW,
             n_threads=2,
-            verbose=False
         )
-        print("Model loaded successfully!")
         return llm
     except Exception as e:
         print(f"Error loading model: {e}")
         return None
-# Load the model only once
-llm = load_llm()
-# --- Inference Function ---
-def generate_and_speak(text_prompt):
-    """
-    Generates a text response using the Llama model.
-    The output text is automatically synthesized into speech by Gradio's Audio component.
-    """
-    if llm is None:
-        return "Error: LLM failed to load. Please check model configuration.", None
-    if not text_prompt or text_prompt.strip() == "":
-        return "Please enter a query.", None
-    # Use a basic prompt template
-    full_prompt = f"### Human: {text_prompt}\n### Assistant:"
     try:
-        output = llm(
-            prompt=full_prompt,
-            max_tokens=MAX_NEW_TOKENS,
-            temperature=TEMPERATURE,
-            stop=["### Human:"],
-            echo=False
         )
-        response_text = output['choices'][0]['text'].strip()
-        # Return the text. It will update the Textbox AND the Audio component.
-        return response_text, response_text
     except Exception as e:
-        return f"LLM Generation Error: {e}", None
-# --- Gradio Interface (TTS Flow) ---
-with gr.Blocks(title=f"Audio Chat with {MODEL_FILE}") as demo:
-    gr.Markdown(f"## 🗣️ LLM Chat with Text-to-Speech (TTS)")
-    gr.Markdown("Type your query (Text Input) and the LLM will reply in both text and auto-generated audio (TTS).")
-    with gr.Row():
-        text_input = gr.Textbox(
-            label="Your Query (Text Input)",
-            lines=2,
-            scale=3
-        )
-        audio_btn = gr.Button("Generate and Speak")
-    # Outputs
-    text_output = gr.Textbox(label="LLM Response Text")
-    audio_output = gr.Audio(
-        label="Assistant Audio Playback (TTS)",
-        autoplay=True,
-        # Gradio automatically synthesizes the text output received by this Audio component
-        # into speech. We set it as an 'update' target.
-        interactive=False
-    )
-    # Set up the event listener: Button click triggers the function.
-    audio_btn.click(
-        fn=generate_and_speak,
-        inputs=[text_input],
-        outputs=[text_output, audio_output]
     )
-    # Enable enter key to submit
-    text_input.submit(
-        fn=generate_and_speak,
-        inputs=[text_input],
-        outputs=[text_output, audio_output]
     )
-demo.launch()

 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import os
+from deepgram import DeepgramClient, PrerecordedOptions, SpeakOptions
 # --- Configuration ---
+# 1. API KEY: Ensure you have your Deepgram API Key ready
+# Ideally, set this in your environment variables as DEEPGRAM_API_KEY
+DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY", "YOUR_DEEPGRAM_KEY_HERE")
+# 2. Model Config
+REPO_ID = "Kezovic/iris-q4gguf-v2"
+FILENAME = "llama-3.2-1b-instruct.Q4_K_M.gguf"
 CONTEXT_WINDOW = 4096
 MAX_NEW_TOKENS = 512
 TEMPERATURE = 0.7
+# --- Initialize Deepgram ---
+if DEEPGRAM_API_KEY == "YOUR_DEEPGRAM_KEY_HERE":
+    print("WARNING: Please set your DEEPGRAM_API_KEY.")
+deepgram = DeepgramClient(DEEPGRAM_API_KEY)
 # --- Model Loading Function ---
+llm = None
 def load_llm():
     """Downloads the GGUF model and initializes LlamaCPP."""
+    global llm
+    print("Downloading LLM...")
     try:
         model_path = hf_hub_download(
+            repo_id=REPO_ID,
+            filename=FILENAME
         )
+        # n_threads=2 is good for free Hugging Face CPU tiers
         llm = Llama(
             model_path=model_path,
             n_ctx=CONTEXT_WINDOW,
             n_threads=2,
+            verbose=False
         )
+        print("LLM loaded successfully!")
         return llm
     except Exception as e:
         print(f"Error loading model: {e}")
         return None
+# Load model on startup
+load_llm()
+# --- 1. Speech-to-Text (Deepgram) ---
+def transcribe_audio(audio_filepath):
+    """Sends audio file to Deepgram and returns text."""
+    if not audio_filepath:
+        return ""
+    try:
+        with open(audio_filepath, "rb") as buffer:
+            payload = {"buffer": buffer}
+            options = PrerecordedOptions(
+                smart_format=True,
+                model="nova-2",
+                language="en-US"
+            )
+            response = deepgram.listen.rest.v("1").transcribe_file(payload, options)
+            return response.results.channels[0].alternatives[0].transcript
+    except Exception as e:
+        print(f"STT Error: {e}")
+        return ""
+# --- 2. Text-to-Speech (Deepgram) ---
+def text_to_speech(text):
+    """Sends text to Deepgram and returns path to audio file."""
     try:
+        filename = "output_response.mp3"
+        options = SpeakOptions(
+            model="aura-asteria-en", # Choices: aura-asteria-en, aura-helios-en, etc.
+            encoding="linear16",
+            container="wav"
         )
+        # Save the audio to a file
+        deepgram.speak.rest.v("1").save(filename, {"text": text}, options)
+        return filename
     except Exception as e:
+        print(f"TTS Error: {e}")
+        return None
+# --- 3. Main Pipeline Function ---
+def process_conversation(audio_input):
+    """
+    1. Transcribe Audio (STT)
+    2. Query LLM
+    3. Synthesize Speech (TTS)
+    """
+    if llm is None:
+        return "Model not loaded.", None, "System Error: Model failed to load."
+    # Step A: Transcribe
+    user_text = transcribe_audio(audio_input)
+    if not user_text:
+        return "Could not hear audio.", None, ""
+    print(f"User said: {user_text}")
+    # Step B: LLM Inference
+    # Using the prompt format from your original code
+    full_prompt = f"### Human: {user_text}\n### Assistant:"
+    output = llm(
+        prompt=full_prompt,
+        max_tokens=MAX_NEW_TOKENS,
+        temperature=TEMPERATURE,
+        stop=["### Human:"],
+        echo=False
     )
+    response_text = output['choices'][0]['text'].strip()
+    print(f"LLM said: {response_text}")
+    # Step C: Speak Response
+    output_audio_path = text_to_speech(response_text)
+    # Return: Transcription (for display), Audio (for playback), LLM Text (for display)
+    return user_text, output_audio_path, response_text
+# --- Gradio UI ---
+with gr.Blocks(title=f"Voice Chat with {FILENAME}") as demo:
+    gr.Markdown(f"## 🗣️ Deepgram Voice Chat with {FILENAME}")
+    with gr.Row():
+        # Input Column
+        with gr.Column():
+            audio_input = gr.Audio(
+                sources=["microphone"],
+                type="filepath",
+                label="Speak Now"
+            )
+            submit_btn = gr.Button("Submit Audio", variant="primary")
+        # Output Column
+        with gr.Column():
+            audio_output = gr.Audio(
+                label="Assistant Voice",
+                autoplay=True, # Automatically plays the response
+                interactive=False
+            )
+            # Debugging/Visuals
+            user_transcript = gr.Textbox(label="You said:")
+            ai_response_text = gr.Textbox(label="AI Response:")
+    # Event Listener
+    submit_btn.click(
+        fn=process_conversation,
+        inputs=[audio_input],
+        outputs=[user_transcript, audio_output, ai_response_text]
     )
+if __name__ == "__main__":
+    demo.launch()