Spaces:

jeremierostan
/

Aya_For_Schools

Sleeping

App Files Files Community

jeremierostan commited on Oct 27, 2024

Commit

48cf021

verified ·

1 Parent(s): d130f0b

Create app.py

Browse files

Files changed (1) hide show

app.py +372 -0

app.py ADDED Viewed

	@@ -0,0 +1,372 @@

+from __future__ import annotations
+import os
+import re
+import time
+import uuid
+import torch
+import cohere
+import secrets
+import fasttext
+import requests
+from groq import Groq
+from dataclasses import dataclass
+from typing import Optional, List, Tuple, Any
+from huggingface_hub import hf_hub_download
+from functools import lru_cache
+import gradio as gr
+# Configuration
+@dataclass
+class Config:
+    device: str = "cuda" if torch.cuda.is_available() else "cpu"
+    batch_size: int = 32
+    model_name: str = "aya-expanse-32B"
+    # API Keys from environment
+    groq_api_key: str = os.getenv("GROQ_API_KEY")
+    chat_cohere_api_key: str = os.getenv("CHAT_COHERE_API_KEY")
+    # Neets API key will be set via user input
+    neets_ai_api_key: Optional[str] = None
+    def set_neets_key(self, key: str):
+        """Update Neets API key."""
+        self.neets_ai_api_key = key
+config = Config()
+# Initialize clients
+def get_clients():
+    return {
+        'chat': cohere.Client(
+            api_key=config.chat_cohere_api_key,
+            client_name="c4ai-aya-expanse-chat"
+        ),
+        'groq': Groq(api_key=config.groq_api_key)
+    }
+clients = get_clients()
+# Language identification
+@lru_cache(maxsize=1)
+def load_lid_model():
+    """Load and cache the language identification model."""
+    lid_model_path = hf_hub_download(
+        repo_id="facebook/fasttext-language-identification",
+        filename="model.bin"
+    )
+    return fasttext.load_model(lid_model_path)
+def predict_language(text: str) -> str:
+    """Predict language of input text using FastText model."""
+    if not text:
+        return "eng_Latn"  # Default to English
+    text = re.sub("\n", " ", text)
+    model = load_lid_model()
+    label, _ = model.predict(text)
+    return label[0][len("__label__"):]
+def clean_text(text: str, remove_bullets: bool = False, remove_newline: bool = False) -> str:
+    """Clean text by removing formatting and optional elements."""
+    if not text:
+        return ""
+    text = re.sub(r"\*\*", "", text)
+    if remove_bullets:
+        text = re.sub(r"^- ", "", text, flags=re.MULTILINE)
+    if remove_newline:
+        text = re.sub(r"\n", " ", text)
+    return text.strip()
+class ConversationManager:
+    """Manages the entire conversation flow including voice, text, and memory."""
+    def __init__(self):
+        self.chat_client = clients['chat']
+    def check_neets_key(self) -> bool:
+        """Check if Neets API key is set."""
+        return bool(config.neets_ai_api_key)
+    def transcribe_audio(self, audio_file: str) -> Tuple[str, str]:
+        """Transcribe audio to text and detect language."""
+        if not audio_file:
+            return "", "eng_Latn"
+        # Transcribe using Whisper
+        with open(audio_file, "rb") as f:
+            transcription = clients['groq'].audio.transcriptions.create(
+                file=(audio_file, f.read()),
+                model="whisper-large-v3-turbo",
+                response_format="json",
+                temperature=0.0
+            )
+        text = transcription.text
+        lang_code = predict_language(text)
+        return text, lang_code
+    def generate_response(
+        self,
+        user_input: str,
+        chat_history: List[Tuple[str, str]],
+        conversation_id: str = None
+    ) -> Tuple[List[Tuple[str, str]], str, str]:
+        """Generate assistant's response based on user input and conversation history."""
+        if not conversation_id:
+            conversation_id = str(uuid.uuid4())
+        # Format history for the model
+        formatted_history = []
+        for human, assistant in chat_history:
+            formatted_history.extend([human, assistant])
+        # Generate response
+        stream = self.chat_client.chat_stream(
+            message=user_input,
+            preamble=CHAT_PREAMBLE,
+            conversation_id=conversation_id,
+            model=config.model_name,
+            temperature=0.3,
+            chat_history=formatted_history
+        )
+        # Collect response
+        response = ""
+        for event in stream:
+            if event.event_type == "text-generation":
+                response += event.text
+                # Update chat history
+                if chat_history and isinstance(chat_history[-1], tuple):
+                    chat_history = chat_history[:-1] + [(user_input, response)]
+                else:
+                    chat_history.append((user_input, response))
+                yield chat_history, response, conversation_id
+        return chat_history, response, conversation_id
+    def text_to_speech(self, text: str, lang_code: str) -> str:
+        """Convert text to speech using Neets.ai."""
+        if not text:
+            return None
+        if not self.check_neets_key():
+            raise ValueError("Neets API key not set. Please enter your API key.")
+        # Get language mapping for Neets.ai
+        neets_lang_id = NEETS_AI_LANGID_MAP.get(lang_code, "en")
+        neets_vits_voice_id = f"vits-{neets_lang_id}"
+        response = requests.post(
+            url="https://api.neets.ai/v1/tts",
+            headers={
+                "Content-Type": "application/json",
+                "X-API-Key": config.neets_ai_api_key
+            },
+            json={
+                "text": text,
+                "voice_id": neets_vits_voice_id,
+                "params": {"model": "vits"}
+            }
+        )
+        if response.status_code != 200:
+            raise ValueError(f"Neets API error: {response.text}")
+        audio_path = f"neets_response_{uuid.uuid4()}.mp3"
+        with open(audio_path, "wb") as f:
+            f.write(response.content)
+        return audio_path
+    def clear_conversation(self) -> Tuple[List, str]:
+        """Clear the conversation history."""
+        return [], str(uuid.uuid4())
+def create_gradio_interface():
+    """Create the Gradio interface for the conversational AI system."""
+    theme = gr.themes.Base(
+        primary_hue=gr.themes.colors.teal,
+        secondary_hue=gr.themes.colors.blue,
+        neutral_hue=gr.themes.colors.gray,
+        text_size=gr.themes.sizes.text_lg,
+    ).set(
+        button_primary_background_fill="#114A56",
+        button_primary_background_fill_hover="#114A56",
+        block_title_text_weight="600",
+        block_label_text_weight="600",
+        block_label_text_size="*text_md",
+    )
+    conversation_manager = ConversationManager()
+    with gr.Blocks(theme=theme, analytics_enabled=False) as demo:
+        # Header
+        with gr.Row():
+            gr.Markdown("""
+            # Multilingual Voice Chat Assistant
+            Have a natural conversation with Aya using voice or text in any of 23 supported languages.
+            """)
+        # API Key input
+        with gr.Row():
+            with gr.Column(scale=1):
+                neets_key = gr.Textbox(
+                    type="password",
+                    label="Enter your Neets.ai API Key",
+                    placeholder="Enter API key here...",
+                    show_label=True
+                )
+                api_status = gr.Markdown("API Key Status: Not Set")
+        def update_api_key(key):
+            if not key:
+                return "API Key Status: Not Set"
+            config.set_neets_key(key)
+            return "API Key Status: Set ✓"
+        neets_key.change(
+            update_api_key,
+            inputs=[neets_key],
+            outputs=[api_status]
+        )
+        # State management
+        conversation_id = gr.State("")
+        current_language = gr.State("eng_Latn")
+        with gr.Row():
+            # Chat interface
+            with gr.Column(scale=2):
+                chatbot = gr.Chatbot(
+                    show_label=False,
+                    show_copy_button=True,
+                    height=400,
+                    label="Conversation"
+                )
+                # Input options
+                with gr.Row():
+                    # Text input
+                    text_input = gr.Textbox(
+                        placeholder="Type your message or use voice input...",
+                        label="Text Input",
+                        lines=2
+                    )
+                    # Voice input
+                    audio_input = gr.Audio(
+                        source="microphone",
+                        type="filepath",
+                        label="Voice Input"
+                    )
+                with gr.Row():
+                    submit_btn = gr.Button("Send Message", variant="primary")
+                    clear_btn = gr.Button("Clear Conversation")
+            # Audio output and info
+            with gr.Column(scale=1):
+                response_audio = gr.Audio(
+                    label="Assistant's Voice Response",
+                    type="filepath"
+                )
+                detected_language = gr.Markdown(
+                    "Detected Language: English",
+                    label="Language Info"
+                )
+        # Event handlers
+        def process_input(
+            input_text: str,
+            input_audio: str,
+            history: List[Tuple[str, str]],
+            conv_id: str,
+            neets_key: str
+        ):
+            if not neets_key:
+                raise gr.Error("Please enter your Neets.ai API key first")
+            # Determine input source
+            if input_audio:
+                user_text, lang_code = conversation_manager.transcribe_audio(input_audio)
+            else:
+                user_text = input_text
+                lang_code = predict_language(user_text)
+            # Update language display
+            lang_name = LID_LANGUAGES.get(lang_code, "Unknown")
+            detected_language.update(f"Detected Language: {lang_name}")
+            # Generate response
+            new_history, response, new_conv_id = conversation_manager.generate_response(
+                user_text, history, conv_id
+            )
+            try:
+                # Generate audio response
+                audio_path = conversation_manager.text_to_speech(response, lang_code)
+            except ValueError as e:
+                raise gr.Error(str(e))
+            return new_history, new_conv_id, audio_path
+        # Connect event handlers
+        submit_btn.click(
+            process_input,
+            inputs=[
+                text_input,
+                audio_input,
+                chatbot,
+                conversation_id,
+                neets_key
+            ],
+            outputs=[
+                chatbot,
+                conversation_id,
+                response_audio
+            ]
+        )
+        # Also trigger on text input enter
+        text_input.submit(
+            process_input,
+            inputs=[
+                text_input,
+                audio_input,
+                chatbot,
+                conversation_id,
+                neets_key
+            ],
+            outputs=[
+                chatbot,
+                conversation_id,
+                response_audio
+            ]
+        )
+        # Clear conversation
+        clear_btn.click(
+            conversation_manager.clear_conversation,
+            outputs=[chatbot, conversation_id]
+        )
+        # Clear inputs after submission
+        submit_btn.click(lambda: "", None, text_input)
+        submit_btn.click(lambda: None, None, audio_input)
+    return demo
+if __name__ == "__main__":
+    demo = create_gradio_interface()
+    demo.queue(
+        api_open=False,
+        max_size=20,
+        default_concurrency_limit=4
+    ).launch(
+        show_api=False,
+        allowed_paths=['/home/user/app']
+    )