voiceCal / streamlit_app.py
Peter Michael Gits
feat: Implement WebRTC integration following unmute.sh pattern
af83599
#!/usr/bin/env python3
"""
VoiceCal Streamlit App with WebRTC Integration (unmute.sh pattern)
"""
import streamlit as st
import sys
from datetime import datetime
import os
import asyncio
import json
def main():
st.set_page_config(
page_title="VoiceCal - Voice Assistant",
page_icon="🎀",
layout="wide"
)
st.title("πŸŽ€πŸ“… VoiceCal - Voice-Enabled AI Assistant")
st.markdown("**WebRTC Voice Integration Following unmute.sh Pattern**")
# Service status dashboard
col1, col2, col3 = st.columns(3)
with col1:
st.metric("🎀 VoiceCal", "Online", "βœ…")
st.metric("πŸ“‘ WebRTC", "Ready", "πŸ”„")
with col2:
st.metric("🧠 STT Service", "Available", "βœ…")
st.metric("πŸ”Š TTS Service", "Available", "βœ…")
with col3:
st.metric("🌐 WebSocket", "Initializing", "⏳")
st.metric("πŸ“± Client", "Pending", "πŸ”Œ")
# WebRTC Integration Section
st.markdown("---")
st.header("🌐 WebRTC Voice Integration")
# JavaScript for WebRTC implementation following unmute.sh pattern
webrtc_html = """
<div id="voice-interface" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 10px; margin: 20px 0;">
<h3 style="color: white; margin-top: 0;">🎀 Voice Interface (unmute.sh Pattern)</h3>
<div style="display: flex; gap: 10px; margin: 20px 0;">
<button id="start-recording" style="background: #ff4757; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;">
πŸŽ™οΈ Start Recording
</button>
<button id="stop-recording" style="background: #2f3542; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;" disabled>
⏹️ Stop Recording
</button>
<button id="test-tts" style="background: #5352ed; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;">
πŸ”Š Test TTS
</button>
</div>
<div id="status" style="background: rgba(0,0,0,0.2); padding: 10px; border-radius: 5px; color: white; font-family: monospace;">
Status: Initializing WebRTC connection...
</div>
<div id="transcription" style="background: rgba(255,255,255,0.9); padding: 15px; border-radius: 5px; margin-top: 10px; min-height: 50px;">
<strong>Transcription:</strong> <span id="transcription-text">Ready for voice input...</span>
</div>
<div id="audio-controls" style="margin-top: 15px;">
<audio id="tts-audio" controls style="width: 100%; display: none;"></audio>
</div>
</div>
<script>
// WebRTC Implementation following unmute.sh pattern
class VoiceCalWebRTC {
constructor() {
this.websocket = null;
this.mediaRecorder = null;
this.audioChunks = [];
this.isRecording = false;
this.clientId = 'demo-' + Math.random().toString(36).substr(2, 9);
this.sttWebSocketUrl = 'wss://pgits-stt-gpu-service.hf.space/ws/stt';
// Use same host and port with different endpoint path
const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
const wsHost = window.location.host; // includes port
this.voiceCalWebSocketUrl = `${wsProtocol}//${wsHost}/ws/webrtc/${this.clientId}`;
this.init();
}
async init() {
this.updateStatus('πŸ”Œ Connecting to WebSocket...');
await this.connectWebSocket();
this.setupEventListeners();
}
async connectWebSocket() {
try {
// Follow unmute.sh pattern: Connect to VoiceCal WebRTC handler
this.websocket = new WebSocket(this.voiceCalWebSocketUrl);
this.websocket.onopen = () => {
this.updateStatus('βœ… WebSocket connected - Ready for voice interaction');
console.log('WebSocket connected successfully');
};
this.websocket.onmessage = (event) => {
const data = JSON.parse(event.data);
this.handleWebSocketMessage(data);
};
this.websocket.onclose = () => {
this.updateStatus('❌ WebSocket disconnected - Attempting reconnection...');
setTimeout(() => this.connectWebSocket(), 3000);
};
this.websocket.onerror = (error) => {
console.error('WebSocket error:', error);
this.updateStatus('❌ WebSocket connection error');
};
} catch (error) {
console.error('WebSocket connection failed:', error);
this.updateStatus('❌ Failed to connect to WebSocket');
}
}
handleWebSocketMessage(data) {
console.log('Received:', data);
switch(data.type) {
case 'connection_confirmed':
this.updateStatus('βœ… Connected - Ready for voice commands');
break;
case 'transcription':
this.updateTranscription(data.text);
this.updateStatus('βœ… Transcription completed');
break;
case 'tts_playback':
this.playTTSAudio(data.audio_data, data.audio_format);
break;
case 'recording_started':
this.updateStatus('πŸŽ™οΈ Recording in progress...');
break;
case 'recording_stopped':
this.updateStatus('⏳ Processing audio (unmute.sh flush trick)...');
break;
case 'chunk_buffered':
this.updateStatus(`πŸ“¦ Buffering audio chunks (${data.buffer_chunks} chunks)`);
break;
case 'error':
case 'transcription_error':
case 'tts_error':
this.updateStatus(`❌ Error: ${data.message}`);
break;
}
}
setupEventListeners() {
document.getElementById('start-recording').addEventListener('click', () => {
this.startRecording();
});
document.getElementById('stop-recording').addEventListener('click', () => {
this.stopRecording();
});
document.getElementById('test-tts').addEventListener('click', () => {
this.testTTS();
});
}
async startRecording() {
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: 16000,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true
}
});
// unmute.sh pattern: Use MediaRecorder with WebM format
this.mediaRecorder = new MediaRecorder(stream, {
mimeType: 'audio/webm;codecs=opus'
});
this.audioChunks = [];
this.mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
this.audioChunks.push(event.data);
// Real-time streaming: Send chunks as they arrive (unmute.sh pattern)
const reader = new FileReader();
reader.onload = () => {
const audioData = btoa(String.fromCharCode(...new Uint8Array(reader.result)));
this.sendWebSocketMessage({
type: 'audio_chunk',
audio_data: audioData,
sample_rate: 16000
});
};
reader.readAsArrayBuffer(event.data);
}
};
this.mediaRecorder.onstop = () => {
// unmute.sh flush trick: Signal end of recording
this.sendWebSocketMessage({
type: 'stop_recording'
});
stream.getTracks().forEach(track => track.stop());
};
// Start recording with small timeslice for real-time streaming
this.mediaRecorder.start(250); // 250ms chunks following unmute.sh pattern
this.isRecording = true;
// Send start recording message
this.sendWebSocketMessage({
type: 'start_recording'
});
// Update UI
document.getElementById('start-recording').disabled = true;
document.getElementById('stop-recording').disabled = false;
this.updateStatus('πŸŽ™οΈ Recording started - Speak now...');
} catch (error) {
console.error('Recording failed:', error);
this.updateStatus('❌ Microphone access failed');
}
}
stopRecording() {
if (this.mediaRecorder && this.isRecording) {
this.mediaRecorder.stop();
this.isRecording = false;
// Update UI
document.getElementById('start-recording').disabled = false;
document.getElementById('stop-recording').disabled = true;
this.updateStatus('⏹️ Recording stopped - Processing...');
}
}
sendWebSocketMessage(message) {
if (this.websocket && this.websocket.readyState === WebSocket.OPEN) {
this.websocket.send(JSON.stringify(message));
}
}
updateStatus(message) {
document.getElementById('status').innerHTML = `Status: ${message}`;
}
updateTranscription(text) {
document.getElementById('transcription-text').innerHTML = text;
}
playTTSAudio(audioData, format) {
try {
const audioElement = document.getElementById('tts-audio');
const audioBytes = atob(audioData);
const audioArray = new Uint8Array(audioBytes.length);
for (let i = 0; i < audioBytes.length; i++) {
audioArray[i] = audioBytes.charCodeAt(i);
}
const audioBlob = new Blob([audioArray], { type: `audio/${format}` });
const audioUrl = URL.createObjectURL(audioBlob);
audioElement.src = audioUrl;
audioElement.style.display = 'block';
audioElement.play();
this.updateStatus('πŸ”Š Playing TTS audio response');
} catch (error) {
console.error('TTS playback failed:', error);
this.updateStatus('❌ TTS playback failed');
}
}
testTTS() {
const testText = "Hello! This is a test of the voice synthesis system. VoiceCal is working with WebRTC integration following the unmute.sh pattern.";
this.sendWebSocketMessage({
type: 'tts_request',
text: testText,
voice_preset: 'v2/en_speaker_6'
});
this.updateStatus('πŸ”Š Requesting TTS synthesis...');
}
}
// Initialize when DOM is ready
document.addEventListener('DOMContentLoaded', () => {
window.voiceCalWebRTC = new VoiceCalWebRTC();
});
// Initialize immediately if DOM is already loaded
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', () => {
window.voiceCalWebRTC = new VoiceCalWebRTC();
});
} else {
window.voiceCalWebRTC = new VoiceCalWebRTC();
}
</script>
"""
# Render the WebRTC interface
st.components.v1.html(webrtc_html, height=600)
# Technical Information
st.markdown("---")
st.header("πŸ”§ Technical Details")
col1, col2 = st.columns(2)
with col1:
st.subheader("πŸ“‘ WebRTC Configuration")
st.code(f"""
WebSocket URL: wss://pgits-voicecal.hf.space/ws/webrtc/{{client_id}}
STT Endpoint: wss://pgits-stt-gpu-service.hf.space/ws/stt
TTS Endpoint: wss://pgits-tts-gpu-service.hf.space/ws/tts
Audio Format: WebM/Opus (16kHz, Mono)
Chunk Size: 250ms (unmute.sh pattern)
""")
with col2:
st.subheader("🎯 Features")
st.write("βœ… Real-time audio streaming")
st.write("βœ… WebRTC MediaRecorder integration")
st.write("βœ… unmute.sh pattern implementation")
st.write("βœ… Automatic chunking & buffering")
st.write("βœ… Flush trick for end-of-stream")
st.write("βœ… Bidirectional voice communication")
# Connection Status
st.subheader("πŸ”— Service Endpoints")
st.json({
"voicecal_websocket": f"wss://pgits-voicecal.hf.space/ws/webrtc/demo-xxxx",
"stt_service": "wss://pgits-stt-gpu-service.hf.space/ws/stt",
"tts_service": "wss://pgits-tts-gpu-service.hf.space/ws/tts",
"pattern": "unmute.sh WebRTC implementation",
"status": "Ready for voice interaction"
})
# Footer
st.markdown("---")
st.markdown("πŸš€ **VoiceCal WebRTC Integration** - Following unmute.sh pattern for optimal voice processing")
if __name__ == "__main__":
main()