| """PersonaFlow - Interactive Audio Character Demo for Hugging Face Spaces."""
|
| import logging
|
| import os
|
| from pathlib import Path
|
|
|
| import gradio as gr
|
| import numpy as np
|
|
|
|
|
| logging.basicConfig(
|
| level=logging.INFO,
|
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| )
|
| logger = logging.getLogger(__name__)
|
|
|
|
|
| IS_SPACES = os.environ.get("SPACE_ID") is not None
|
|
|
|
|
| if IS_SPACES:
|
| import spaces
|
|
|
|
|
| from config.characters import get_character, get_all_characters, DEFAULT_CHARACTER_ID
|
|
|
|
|
| _pipeline = None
|
|
|
|
|
| def get_pipeline():
|
| """Get the audio pipeline, creating it if needed."""
|
| global _pipeline
|
| if _pipeline is None:
|
| from src.pipeline import AudioPipeline
|
| device = "cuda" if IS_SPACES else "cpu"
|
| _pipeline = AudioPipeline(device=device)
|
| return _pipeline
|
|
|
|
|
| def _process_audio_impl(audio_tuple, character_id, conversation_history):
|
| """Implementation of audio processing pipeline."""
|
| if audio_tuple is None:
|
| return None, "", "", "No audio recorded"
|
|
|
| sample_rate, audio_data = audio_tuple
|
|
|
|
|
| if len(audio_data) == 0:
|
| return None, "", "", "No audio detected"
|
|
|
|
|
| character = get_character(character_id)
|
| if character is None:
|
| character = get_character(DEFAULT_CHARACTER_ID)
|
|
|
| logger.info(f"Processing audio for character: {character.name}")
|
|
|
| try:
|
|
|
| pipeline = get_pipeline()
|
| audio_out, user_text, response_text, timings = pipeline.process(
|
| audio_tuple=audio_tuple,
|
| system_prompt=character.system_prompt,
|
| voice=character.voice,
|
| conversation_history=conversation_history,
|
| )
|
|
|
|
|
| timing_str = f"STT: {timings['stt']*1000:.0f}ms | LLM: {timings['llm']*1000:.0f}ms | TTS: {timings['tts']*1000:.0f}ms | Total: {timings['total']*1000:.0f}ms"
|
|
|
| return audio_out, user_text, response_text, timing_str
|
|
|
| except Exception as e:
|
| logger.error(f"Error processing audio: {e}", exc_info=True)
|
| return None, "", f"Error: {str(e)}", ""
|
|
|
|
|
|
|
| if IS_SPACES:
|
| @spaces.GPU(duration=30)
|
| def process_audio_gpu(audio_tuple, character_id, conversation_history):
|
| """Process audio with GPU acceleration on Spaces."""
|
| return _process_audio_impl(audio_tuple, character_id, conversation_history)
|
| else:
|
| def process_audio_gpu(audio_tuple, character_id, conversation_history):
|
| """Process audio locally (no GPU decorator)."""
|
| return _process_audio_impl(audio_tuple, character_id, conversation_history)
|
|
|
|
|
| def create_portrait_html(character):
|
| """Create HTML for the animated portrait."""
|
| emoji = 'π' if character.id == 'visionary' else 'π€' if character.id == 'skeptic' else 'π'
|
| return f"""
|
| <div class="portrait-container portrait-idle" style="
|
| width: 200px;
|
| height: 200px;
|
| border-radius: 50%;
|
| background: {character.portrait_color};
|
| margin: 0 auto;
|
| display: flex;
|
| align-items: center;
|
| justify-content: center;
|
| box-shadow: 0 4px 20px rgba(0, 0, 0, 0.2);
|
| position: relative;
|
| ">
|
| <div class="portrait-placeholder" style="font-size: 80px;">
|
| {emoji}
|
| </div>
|
| <div class="mouth-overlay mouth-closed" style="
|
| position: absolute;
|
| bottom: 25%;
|
| left: 50%;
|
| transform: translateX(-50%);
|
| width: 40px;
|
| height: 8px;
|
| background: rgba(0, 0, 0, 0.2);
|
| border-radius: 4px;
|
| "></div>
|
| </div>
|
| <div class="status-indicator status-idle" style="
|
| display: flex;
|
| align-items: center;
|
| justify-content: center;
|
| gap: 8px;
|
| padding: 8px 16px;
|
| border-radius: 20px;
|
| margin: 15px auto;
|
| width: fit-content;
|
| background: #f3f4f6;
|
| ">
|
| <div class="status-dot" style="width: 8px; height: 8px; border-radius: 50%; background: #9ca3af;"></div>
|
| <span class="status-text">Ready to listen</span>
|
| </div>
|
| """
|
|
|
|
|
| def on_audio_record(audio, character_id, history):
|
| """Handle audio recording completion."""
|
| if history is None:
|
| history = []
|
|
|
| if audio is None:
|
| return None, "", history, history
|
|
|
|
|
| conversation_history = []
|
| for user_msg, assistant_msg in history:
|
| conversation_history.append({"role": "user", "content": user_msg})
|
| conversation_history.append({"role": "assistant", "content": assistant_msg})
|
|
|
|
|
| audio_out, user_text, response_text, timing = process_audio_gpu(
|
| audio, character_id, conversation_history
|
| )
|
|
|
|
|
| new_history = list(history)
|
| if user_text and response_text:
|
| new_history.append((user_text, response_text))
|
|
|
| return audio_out, timing, new_history, new_history
|
|
|
|
|
| def update_character_info(character_id):
|
| """Update character info when selection changes."""
|
| char = get_character(character_id)
|
| if char:
|
| return f"**{char.tagline}**\n\n{char.description}", create_portrait_html(char), [], []
|
| return "", "", [], []
|
|
|
|
|
| def clear_conversation():
|
| """Clear the conversation history."""
|
| return [], []
|
|
|
|
|
|
|
| css_path = Path(__file__).parent / "static" / "styles.css"
|
| custom_css = ""
|
| if css_path.exists():
|
| custom_css = css_path.read_text()
|
|
|
|
|
|
|
| with gr.Blocks(
|
| title="PersonaFlow",
|
| theme=gr.themes.Soft(),
|
| css=custom_css,
|
| ) as demo:
|
|
|
| gr.LoginButton(value="Sign in to use your Pro Quota")
|
|
|
|
|
| conversation_state = gr.State([])
|
|
|
|
|
| gr.Markdown("""
|
| # π PersonaFlow
|
| ### Speak with AI characters that have distinct personalities and voices
|
|
|
| Select a character, then click the microphone to start talking!
|
| """)
|
|
|
| with gr.Row():
|
|
|
| with gr.Column(scale=1):
|
| gr.Markdown("### Choose Your Character")
|
|
|
| character_dropdown = gr.Dropdown(
|
| choices=[(c.name, c.id) for c in get_all_characters()],
|
| value=DEFAULT_CHARACTER_ID,
|
| label="Character",
|
| interactive=True,
|
| )
|
|
|
|
|
| default_char = get_character(DEFAULT_CHARACTER_ID)
|
| character_info = gr.Markdown(
|
| f"**{default_char.tagline}**\n\n{default_char.description}"
|
| )
|
|
|
|
|
| with gr.Column(scale=2):
|
|
|
| portrait_html = gr.HTML(
|
| value=create_portrait_html(get_character(DEFAULT_CHARACTER_ID)),
|
| )
|
|
|
|
|
| audio_input = gr.Audio(
|
| sources=["microphone"],
|
| type="numpy",
|
| label="π€ Click to speak",
|
| max_length=10,
|
| )
|
|
|
|
|
| audio_output = gr.Audio(
|
| label="Character Response",
|
| type="numpy",
|
| autoplay=True,
|
| )
|
|
|
|
|
| timing_display = gr.Textbox(
|
| label="Processing Time",
|
| interactive=False,
|
| )
|
|
|
|
|
| with gr.Column(scale=1):
|
| gr.Markdown("### Conversation")
|
|
|
| chatbot = gr.Chatbot(
|
| label="Chat History",
|
| height=400,
|
| )
|
|
|
| clear_btn = gr.Button("ποΈ Clear Conversation", variant="secondary")
|
|
|
|
|
| character_dropdown.change(
|
| fn=update_character_info,
|
| inputs=[character_dropdown],
|
| outputs=[character_info, portrait_html, chatbot, conversation_state],
|
| )
|
|
|
|
|
| audio_input.stop_recording(
|
| fn=on_audio_record,
|
| inputs=[audio_input, character_dropdown, conversation_state],
|
| outputs=[audio_output, timing_display, chatbot, conversation_state],
|
| )
|
|
|
|
|
| clear_btn.click(
|
| fn=clear_conversation,
|
| outputs=[chatbot, conversation_state],
|
| )
|
|
|
| if __name__ == "__main__":
|
| demo.launch(show_api=False)
|
|
|