Commit
·
8539a09
1
Parent(s):
c06a600
add: urdu, llm
Browse files- .gitignore +3 -0
- app.py +37 -13
- index.html +135 -3
- index2.html +0 -496
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.idea
|
2 |
+
__pycache__
|
3 |
+
.env
|
app.py
CHANGED
@@ -22,9 +22,12 @@ from google.genai.types import (
|
|
22 |
PrebuiltVoiceConfig,
|
23 |
SpeechConfig,
|
24 |
VoiceConfig,
|
|
|
|
|
25 |
)
|
26 |
from gradio.utils import get_space
|
27 |
from pydantic import BaseModel
|
|
|
28 |
current_dir = pathlib.Path(__file__).parent
|
29 |
load_dotenv()
|
30 |
api_key = os.getenv("GEMINI_API_KEY")
|
@@ -32,7 +35,6 @@ if not api_key:
|
|
32 |
raise ValueError("GEMINI_API_KEY environment variable is not set")
|
33 |
|
34 |
|
35 |
-
|
36 |
def encode_audio(data: np.ndarray) -> str:
|
37 |
"""Encode Audio data to send to the server"""
|
38 |
return base64.b64encode(data.tobytes()).decode("UTF-8")
|
@@ -42,10 +44,10 @@ class GeminiHandler(AsyncStreamHandler):
|
|
42 |
"""Handler for the Gemini API"""
|
43 |
|
44 |
def __init__(
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
) -> None:
|
50 |
super().__init__(
|
51 |
expected_layout,
|
@@ -82,16 +84,40 @@ class GeminiHandler(AsyncStreamHandler):
|
|
82 |
speech_config=SpeechConfig(
|
83 |
voice_config=VoiceConfig(
|
84 |
prebuilt_voice_config=PrebuiltVoiceConfig(
|
85 |
-
voice_name=voice_name,
|
86 |
)
|
87 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
)
|
89 |
)
|
90 |
async with client.aio.live.connect(
|
91 |
-
|
92 |
) as session:
|
93 |
async for audio in session.start_stream(
|
94 |
-
|
95 |
):
|
96 |
if audio.data:
|
97 |
array = np.frombuffer(audio.data, dtype=np.int16)
|
@@ -123,10 +149,9 @@ stream = Stream(
|
|
123 |
mode="send-receive",
|
124 |
handler=GeminiHandler(),
|
125 |
rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
|
126 |
-
concurrency_limit=
|
127 |
time_limit=90 if get_space() else None,
|
128 |
additional_inputs=[
|
129 |
-
|
130 |
gr.Dropdown(
|
131 |
label="Voice",
|
132 |
choices=[
|
@@ -147,7 +172,6 @@ class InputData(BaseModel):
|
|
147 |
voice_name: str
|
148 |
|
149 |
|
150 |
-
|
151 |
app = FastAPI()
|
152 |
|
153 |
stream.mount(app)
|
@@ -155,7 +179,7 @@ stream.mount(app)
|
|
155 |
|
156 |
@app.post("/input_hook")
|
157 |
async def _(body: InputData):
|
158 |
-
stream.set_input(body.webrtc_id,
|
159 |
return {"status": "ok"}
|
160 |
|
161 |
|
@@ -181,4 +205,4 @@ if __name__ == "__main__":
|
|
181 |
elif mode == "PHONE":
|
182 |
stream.fastphone(host="localhost", port=7860)
|
183 |
else:
|
184 |
-
uvicorn.run(app, host="localhost", port=7860)
|
|
|
22 |
PrebuiltVoiceConfig,
|
23 |
SpeechConfig,
|
24 |
VoiceConfig,
|
25 |
+
Content,
|
26 |
+
Part
|
27 |
)
|
28 |
from gradio.utils import get_space
|
29 |
from pydantic import BaseModel
|
30 |
+
|
31 |
current_dir = pathlib.Path(__file__).parent
|
32 |
load_dotenv()
|
33 |
api_key = os.getenv("GEMINI_API_KEY")
|
|
|
35 |
raise ValueError("GEMINI_API_KEY environment variable is not set")
|
36 |
|
37 |
|
|
|
38 |
def encode_audio(data: np.ndarray) -> str:
|
39 |
"""Encode Audio data to send to the server"""
|
40 |
return base64.b64encode(data.tobytes()).decode("UTF-8")
|
|
|
44 |
"""Handler for the Gemini API"""
|
45 |
|
46 |
def __init__(
|
47 |
+
self,
|
48 |
+
expected_layout: Literal["mono"] = "mono",
|
49 |
+
output_sample_rate: int = 24000,
|
50 |
+
output_frame_size: int = 480,
|
51 |
) -> None:
|
52 |
super().__init__(
|
53 |
expected_layout,
|
|
|
84 |
speech_config=SpeechConfig(
|
85 |
voice_config=VoiceConfig(
|
86 |
prebuilt_voice_config=PrebuiltVoiceConfig(
|
87 |
+
voice_name=voice_name,
|
88 |
)
|
89 |
)
|
90 |
+
),
|
91 |
+
|
92 |
+
system_instruction=Content(
|
93 |
+
parts=[Part(
|
94 |
+
text="""You are an AI calling assistant for Ishwor Subedi, an AI/ML freelancer. When speaking with clients:
|
95 |
+
|
96 |
+
2. For professional inquiries, highlight these key skills concisely:
|
97 |
+
- 2+ years in machine learning and AI
|
98 |
+
- Computer Vision expertise
|
99 |
+
- NLP capabilities
|
100 |
+
- Software and mobile app development
|
101 |
+
- Upwork freelancer with proven track record
|
102 |
+
3. For generic questions:
|
103 |
+
- Provide brief, direct answers (1-2 sentences)
|
104 |
+
- Avoid lengthy explanations
|
105 |
+
- Always connect responses back to Ishwor's services when possible
|
106 |
+
4. Keep website reference simple: "Visit ishwor-subedi.com.np for portfolio details"
|
107 |
+
5. Speak in Hindi throughout
|
108 |
+
6. For unrelated topics: "Please contact Ishwor directly for assistance with this"
|
109 |
+
|
110 |
+
Maintain professional tone while keeping all responses concise and focused.
|
111 |
+
""")],
|
112 |
+
role="user"
|
113 |
+
|
114 |
)
|
115 |
)
|
116 |
async with client.aio.live.connect(
|
117 |
+
model="gemini-2.0-flash-exp", config=config
|
118 |
) as session:
|
119 |
async for audio in session.start_stream(
|
120 |
+
stream=self.stream(), mime_type="audio/pcm"
|
121 |
):
|
122 |
if audio.data:
|
123 |
array = np.frombuffer(audio.data, dtype=np.int16)
|
|
|
149 |
mode="send-receive",
|
150 |
handler=GeminiHandler(),
|
151 |
rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
|
152 |
+
concurrency_limit=2,
|
153 |
time_limit=90 if get_space() else None,
|
154 |
additional_inputs=[
|
|
|
155 |
gr.Dropdown(
|
156 |
label="Voice",
|
157 |
choices=[
|
|
|
172 |
voice_name: str
|
173 |
|
174 |
|
|
|
175 |
app = FastAPI()
|
176 |
|
177 |
stream.mount(app)
|
|
|
179 |
|
180 |
@app.post("/input_hook")
|
181 |
async def _(body: InputData):
|
182 |
+
stream.set_input(body.webrtc_id, body.voice_name)
|
183 |
return {"status": "ok"}
|
184 |
|
185 |
|
|
|
205 |
elif mode == "PHONE":
|
206 |
stream.fastphone(host="localhost", port=7860)
|
207 |
else:
|
208 |
+
uvicorn.run(app, host="localhost", port=7860)
|
index.html
CHANGED
@@ -298,6 +298,108 @@
|
|
298 |
.footer a:hover {
|
299 |
text-decoration: underline;
|
300 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
</style>
|
302 |
</head>
|
303 |
|
@@ -305,11 +407,30 @@
|
|
305 |
<div id="error-toast" class="toast"></div>
|
306 |
|
307 |
<div class="header">
|
308 |
-
<h1>
|
309 |
-
<p>Real-time
|
|
|
|
|
|
|
|
|
310 |
</div>
|
311 |
|
312 |
<div class="container">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
313 |
<div class="controls">
|
314 |
<!-- <div class="input-group">
|
315 |
<label for="api-key"><i class="fas fa-key"></i> API Key</label>
|
@@ -337,7 +458,18 @@
|
|
337 |
</div>
|
338 |
|
339 |
<div class="footer">
|
340 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
341 |
</div>
|
342 |
|
343 |
<audio id="audio-output"></audio>
|
|
|
298 |
.footer a:hover {
|
299 |
text-decoration: underline;
|
300 |
}
|
301 |
+
|
302 |
+
.subtitle {
|
303 |
+
font-size: 1.25rem;
|
304 |
+
color: var(--color-text-secondary);
|
305 |
+
margin-bottom: 1rem;
|
306 |
+
}
|
307 |
+
|
308 |
+
.language-badge {
|
309 |
+
display: inline-block;
|
310 |
+
padding: 0.5rem 1rem;
|
311 |
+
background: var(--gradient);
|
312 |
+
border-radius: 2rem;
|
313 |
+
font-size: 1rem;
|
314 |
+
margin-top: 1rem;
|
315 |
+
box-shadow: var(--box-shadow);
|
316 |
+
}
|
317 |
+
|
318 |
+
.feature-badges {
|
319 |
+
display: flex;
|
320 |
+
justify-content: center;
|
321 |
+
gap: 1rem;
|
322 |
+
margin-bottom: 2rem;
|
323 |
+
flex-wrap: wrap;
|
324 |
+
}
|
325 |
+
|
326 |
+
.badge {
|
327 |
+
display: flex;
|
328 |
+
align-items: center;
|
329 |
+
gap: 0.5rem;
|
330 |
+
padding: 0.75rem 1.25rem;
|
331 |
+
background-color: var(--color-surface-light);
|
332 |
+
border-radius: 1rem;
|
333 |
+
font-size: 0.875rem;
|
334 |
+
border: 1px solid rgba(255, 255, 255, 0.05);
|
335 |
+
}
|
336 |
+
|
337 |
+
.badge i {
|
338 |
+
color: var(--color-primary-light);
|
339 |
+
}
|
340 |
+
|
341 |
+
.developer-info {
|
342 |
+
display: flex;
|
343 |
+
align-items: center;
|
344 |
+
gap: 1rem;
|
345 |
+
margin-bottom: 1rem;
|
346 |
+
}
|
347 |
+
|
348 |
+
.developer-avatar {
|
349 |
+
width: 48px;
|
350 |
+
height: 48px;
|
351 |
+
border-radius: 50%;
|
352 |
+
border: 2px solid var(--color-primary);
|
353 |
+
}
|
354 |
+
|
355 |
+
.developer-details {
|
356 |
+
text-align: left;
|
357 |
+
}
|
358 |
+
|
359 |
+
.developer-title {
|
360 |
+
font-size: 0.75rem;
|
361 |
+
color: var(--color-primary-light);
|
362 |
+
margin-top: 0.25rem;
|
363 |
+
}
|
364 |
+
|
365 |
+
.social-links {
|
366 |
+
display: flex;
|
367 |
+
gap: 1rem;
|
368 |
+
margin-top: 1rem;
|
369 |
+
}
|
370 |
+
|
371 |
+
.social-links a {
|
372 |
+
color: var(--color-text-secondary);
|
373 |
+
font-size: 1.25rem;
|
374 |
+
transition: color 0.2s ease;
|
375 |
+
}
|
376 |
+
|
377 |
+
.social-links a:hover {
|
378 |
+
color: var(--color-primary-light);
|
379 |
+
}
|
380 |
+
|
381 |
+
.footer {
|
382 |
+
background-color: var(--color-surface);
|
383 |
+
padding: 1.5rem;
|
384 |
+
border-radius: 1rem;
|
385 |
+
margin-top: 2rem;
|
386 |
+
box-shadow: var(--box-shadow);
|
387 |
+
border: 1px solid rgba(255, 255, 255, 0.05);
|
388 |
+
max-width: 600px;
|
389 |
+
width: 90%;
|
390 |
+
}
|
391 |
+
|
392 |
+
@media (max-width: 768px) {
|
393 |
+
.feature-badges {
|
394 |
+
flex-direction: column;
|
395 |
+
align-items: center;
|
396 |
+
}
|
397 |
+
|
398 |
+
.badge {
|
399 |
+
width: 100%;
|
400 |
+
justify-content: center;
|
401 |
+
}
|
402 |
+
}
|
403 |
</style>
|
404 |
</head>
|
405 |
|
|
|
407 |
<div id="error-toast" class="toast"></div>
|
408 |
|
409 |
<div class="header">
|
410 |
+
<h1>Urdu AI Assistant</h1>
|
411 |
+
<p class="subtitle">Real-time Urdu conversations powered by advanced AI technology</p>
|
412 |
+
<div class="language-badge">
|
413 |
+
<i class="fas fa-language"></i>
|
414 |
+
हिंदी / اردو
|
415 |
+
</div>
|
416 |
</div>
|
417 |
|
418 |
<div class="container">
|
419 |
+
<div class="feature-badges">
|
420 |
+
<div class="badge">
|
421 |
+
<i class="fas fa-microphone-alt"></i>
|
422 |
+
Real-time Voice
|
423 |
+
</div>
|
424 |
+
<div class="badge">
|
425 |
+
<i class="fas fa-brain"></i>
|
426 |
+
AI Powered
|
427 |
+
</div>
|
428 |
+
<div class="badge">
|
429 |
+
<i class="fas fa-language"></i>
|
430 |
+
Urdu Support
|
431 |
+
</div>
|
432 |
+
</div>
|
433 |
+
|
434 |
<div class="controls">
|
435 |
<!-- <div class="input-group">
|
436 |
<label for="api-key"><i class="fas fa-key"></i> API Key</label>
|
|
|
458 |
</div>
|
459 |
|
460 |
<div class="footer">
|
461 |
+
<div class="developer-info">
|
462 |
+
<img src="https://media.licdn.com/dms/image/v2/D4D03AQFccVUQVirANg/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1724430812003?e=1746662400&v=beta&t=w7dT8KntFGMuWX8sIb6J53xSsiV-M_MXLYBnBNVCARg" alt="Ishwor Subedi" class="developer-avatar">
|
463 |
+
<div class="developer-details">
|
464 |
+
<p>Developed by <a href="https://ishwor-subedi.com.np" target="_blank">Ishwor Subedi</a></p>
|
465 |
+
<p class="developer-title">AI/ML Engineer & Freelancer</p>
|
466 |
+
</div>
|
467 |
+
</div>
|
468 |
+
<div class="social-links">
|
469 |
+
<a href="https://github.com/ishworrsubedii" target="_blank"><i class="fab fa-github"></i></a>
|
470 |
+
<a href="https://www.linkedin.com/in/ishworrsubedii/" target="_blank"><i class="fab fa-linkedin"></i></a>
|
471 |
+
<a href="https://www.upwork.com/freelancers/~01a5bd20f3bdbf5bde" target="_blank"><i class="fab fa-upwork"></i></a>
|
472 |
+
</div>
|
473 |
</div>
|
474 |
|
475 |
<audio id="audio-output"></audio>
|
index2.html
DELETED
@@ -1,496 +0,0 @@
|
|
1 |
-
<!DOCTYPE html>
|
2 |
-
<html lang="en">
|
3 |
-
|
4 |
-
<head>
|
5 |
-
<meta charset="UTF-8">
|
6 |
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
7 |
-
<title>Gemini Voice Chat</title>
|
8 |
-
<style>
|
9 |
-
:root {
|
10 |
-
--color-accent: #6366f1;
|
11 |
-
--color-background: #0f172a;
|
12 |
-
--color-surface: #1e293b;
|
13 |
-
--color-text: #e2e8f0;
|
14 |
-
--boxSize: 8px;
|
15 |
-
--gutter: 4px;
|
16 |
-
}
|
17 |
-
|
18 |
-
body {
|
19 |
-
margin: 0;
|
20 |
-
padding: 0;
|
21 |
-
background-color: var(--color-background);
|
22 |
-
color: var(--color-text);
|
23 |
-
font-family: system-ui, -apple-system, sans-serif;
|
24 |
-
min-height: 100vh;
|
25 |
-
display: flex;
|
26 |
-
flex-direction: column;
|
27 |
-
align-items: center;
|
28 |
-
justify-content: center;
|
29 |
-
}
|
30 |
-
|
31 |
-
.container {
|
32 |
-
width: 90%;
|
33 |
-
max-width: 800px;
|
34 |
-
background-color: var(--color-surface);
|
35 |
-
padding: 2rem;
|
36 |
-
border-radius: 1rem;
|
37 |
-
box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
|
38 |
-
}
|
39 |
-
|
40 |
-
.wave-container {
|
41 |
-
position: relative;
|
42 |
-
display: flex;
|
43 |
-
min-height: 100px;
|
44 |
-
max-height: 128px;
|
45 |
-
justify-content: center;
|
46 |
-
align-items: center;
|
47 |
-
margin: 2rem 0;
|
48 |
-
}
|
49 |
-
|
50 |
-
.box-container {
|
51 |
-
display: flex;
|
52 |
-
justify-content: space-between;
|
53 |
-
height: 64px;
|
54 |
-
width: 100%;
|
55 |
-
}
|
56 |
-
|
57 |
-
.box {
|
58 |
-
height: 100%;
|
59 |
-
width: var(--boxSize);
|
60 |
-
background: var(--color-accent);
|
61 |
-
border-radius: 8px;
|
62 |
-
transition: transform 0.05s ease;
|
63 |
-
}
|
64 |
-
|
65 |
-
.controls {
|
66 |
-
display: grid;
|
67 |
-
gap: 1rem;
|
68 |
-
margin-bottom: 2rem;
|
69 |
-
}
|
70 |
-
|
71 |
-
.input-group {
|
72 |
-
display: flex;
|
73 |
-
flex-direction: column;
|
74 |
-
gap: 0.5rem;
|
75 |
-
}
|
76 |
-
|
77 |
-
label {
|
78 |
-
font-size: 0.875rem;
|
79 |
-
font-weight: 500;
|
80 |
-
}
|
81 |
-
|
82 |
-
input,
|
83 |
-
select {
|
84 |
-
padding: 0.75rem;
|
85 |
-
border-radius: 0.5rem;
|
86 |
-
border: 1px solid rgba(255, 255, 255, 0.1);
|
87 |
-
background-color: var(--color-background);
|
88 |
-
color: var(--color-text);
|
89 |
-
font-size: 1rem;
|
90 |
-
}
|
91 |
-
|
92 |
-
button {
|
93 |
-
padding: 1rem 2rem;
|
94 |
-
border-radius: 0.5rem;
|
95 |
-
border: none;
|
96 |
-
background-color: var(--color-accent);
|
97 |
-
color: white;
|
98 |
-
font-weight: 600;
|
99 |
-
cursor: pointer;
|
100 |
-
transition: all 0.2s ease;
|
101 |
-
}
|
102 |
-
|
103 |
-
button:hover {
|
104 |
-
opacity: 0.9;
|
105 |
-
transform: translateY(-1px);
|
106 |
-
}
|
107 |
-
|
108 |
-
.icon-with-spinner {
|
109 |
-
display: flex;
|
110 |
-
align-items: center;
|
111 |
-
justify-content: center;
|
112 |
-
gap: 12px;
|
113 |
-
min-width: 180px;
|
114 |
-
}
|
115 |
-
|
116 |
-
.spinner {
|
117 |
-
width: 20px;
|
118 |
-
height: 20px;
|
119 |
-
border: 2px solid white;
|
120 |
-
border-top-color: transparent;
|
121 |
-
border-radius: 50%;
|
122 |
-
animation: spin 1s linear infinite;
|
123 |
-
flex-shrink: 0;
|
124 |
-
}
|
125 |
-
|
126 |
-
@keyframes spin {
|
127 |
-
to {
|
128 |
-
transform: rotate(360deg);
|
129 |
-
}
|
130 |
-
}
|
131 |
-
|
132 |
-
.pulse-container {
|
133 |
-
display: flex;
|
134 |
-
align-items: center;
|
135 |
-
justify-content: center;
|
136 |
-
gap: 12px;
|
137 |
-
min-width: 180px;
|
138 |
-
}
|
139 |
-
|
140 |
-
.pulse-circle {
|
141 |
-
width: 20px;
|
142 |
-
height: 20px;
|
143 |
-
border-radius: 50%;
|
144 |
-
background-color: white;
|
145 |
-
opacity: 0.2;
|
146 |
-
flex-shrink: 0;
|
147 |
-
transform: translateX(-0%) scale(var(--audio-level, 1));
|
148 |
-
transition: transform 0.1s ease;
|
149 |
-
}
|
150 |
-
|
151 |
-
/* Add styles for toast notifications */
|
152 |
-
.toast {
|
153 |
-
position: fixed;
|
154 |
-
top: 20px;
|
155 |
-
left: 50%;
|
156 |
-
transform: translateX(-50%);
|
157 |
-
padding: 16px 24px;
|
158 |
-
border-radius: 4px;
|
159 |
-
font-size: 14px;
|
160 |
-
z-index: 1000;
|
161 |
-
display: none;
|
162 |
-
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
|
163 |
-
}
|
164 |
-
|
165 |
-
.toast.error {
|
166 |
-
background-color: #f44336;
|
167 |
-
color: white;
|
168 |
-
}
|
169 |
-
|
170 |
-
.toast.warning {
|
171 |
-
background-color: #ffd700;
|
172 |
-
color: black;
|
173 |
-
}
|
174 |
-
</style>
|
175 |
-
</head>
|
176 |
-
|
177 |
-
|
178 |
-
<body>
|
179 |
-
<!-- Add toast element after body opening tag -->
|
180 |
-
<div id="error-toast" class="toast"></div>
|
181 |
-
<div style="text-align: center">
|
182 |
-
<h1>Gemini Voice Chat</h1>
|
183 |
-
<p>Speak with Gemini using real-time audio streaming</p>
|
184 |
-
<p>
|
185 |
-
Get a Gemini API key
|
186 |
-
<a href="https://ai.google.dev/gemini-api/docs/api-key">here</a>
|
187 |
-
</p>
|
188 |
-
</div>
|
189 |
-
<div class="container">
|
190 |
-
<div class="controls">
|
191 |
-
<div class="input-group">
|
192 |
-
<label for="api-key">API Key</label>
|
193 |
-
<input type="password" id="api-key" placeholder="Enter your API key">
|
194 |
-
</div>
|
195 |
-
<div class="input-group">
|
196 |
-
<label for="voice">Voice</label>
|
197 |
-
<select id="voice">
|
198 |
-
<option value="Puck">Puck</option>
|
199 |
-
<option value="Charon">Charon</option>
|
200 |
-
<option value="Kore">Kore</option>
|
201 |
-
<option value="Fenrir">Fenrir</option>
|
202 |
-
<option value="Aoede">Aoede</option>
|
203 |
-
</select>
|
204 |
-
</div>
|
205 |
-
</div>
|
206 |
-
|
207 |
-
<div class="wave-container">
|
208 |
-
<div class="box-container">
|
209 |
-
<!-- Boxes will be dynamically added here -->
|
210 |
-
</div>
|
211 |
-
</div>
|
212 |
-
|
213 |
-
<button id="start-button">Start Recording</button>
|
214 |
-
</div>
|
215 |
-
|
216 |
-
<audio id="audio-output"></audio>
|
217 |
-
|
218 |
-
<script>
|
219 |
-
let peerConnection;
|
220 |
-
let audioContext;
|
221 |
-
let dataChannel;
|
222 |
-
let isRecording = false;
|
223 |
-
let webrtc_id;
|
224 |
-
let animationId;
|
225 |
-
let analyser;
|
226 |
-
let analyser_input;
|
227 |
-
let dataArray;
|
228 |
-
let dataArray_input;
|
229 |
-
|
230 |
-
const startButton = document.getElementById('start-button');
|
231 |
-
const apiKeyInput = document.getElementById('api-key');
|
232 |
-
const voiceSelect = document.getElementById('voice');
|
233 |
-
const audioOutput = document.getElementById('audio-output');
|
234 |
-
const boxContainer = document.querySelector('.box-container');
|
235 |
-
|
236 |
-
const numBars = 32;
|
237 |
-
for (let i = 0; i < numBars; i++) {
|
238 |
-
const box = document.createElement('div');
|
239 |
-
box.className = 'box';
|
240 |
-
boxContainer.appendChild(box);
|
241 |
-
}
|
242 |
-
|
243 |
-
function updateButtonState() {
|
244 |
-
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
|
245 |
-
startButton.innerHTML = `
|
246 |
-
<div class="icon-with-spinner">
|
247 |
-
<div class="spinner"></div>
|
248 |
-
<span>Connecting...</span>
|
249 |
-
</div>
|
250 |
-
`;
|
251 |
-
} else if (peerConnection && peerConnection.connectionState === 'connected') {
|
252 |
-
startButton.innerHTML = `
|
253 |
-
<div class="pulse-container">
|
254 |
-
<div class="pulse-circle"></div>
|
255 |
-
<span>Stop Recording</span>
|
256 |
-
</div>
|
257 |
-
`;
|
258 |
-
} else {
|
259 |
-
startButton.innerHTML = 'Start Recording';
|
260 |
-
}
|
261 |
-
}
|
262 |
-
|
263 |
-
function showError(message) {
|
264 |
-
const toast = document.getElementById('error-toast');
|
265 |
-
toast.textContent = message;
|
266 |
-
toast.className = 'toast error';
|
267 |
-
toast.style.display = 'block';
|
268 |
-
|
269 |
-
// Hide toast after 5 seconds
|
270 |
-
setTimeout(() => {
|
271 |
-
toast.style.display = 'none';
|
272 |
-
}, 5000);
|
273 |
-
}
|
274 |
-
|
275 |
-
async function setupWebRTC() {
|
276 |
-
// Replace placeholder with actual configuration
|
277 |
-
const config = {
|
278 |
-
iceServers: [
|
279 |
-
{ urls: 'stun:stun.l.google.com:19302' }
|
280 |
-
]
|
281 |
-
};
|
282 |
-
peerConnection = new RTCPeerConnection(config);
|
283 |
-
webrtc_id = Math.random().toString(36).substring(7);
|
284 |
-
|
285 |
-
const timeoutId = setTimeout(() => {
|
286 |
-
const toast = document.getElementById('error-toast');
|
287 |
-
toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
|
288 |
-
toast.className = 'toast warning';
|
289 |
-
toast.style.display = 'block';
|
290 |
-
|
291 |
-
// Hide warning after 5 seconds
|
292 |
-
setTimeout(() => {
|
293 |
-
toast.style.display = 'none';
|
294 |
-
}, 5000);
|
295 |
-
}, 5000);
|
296 |
-
|
297 |
-
try {
|
298 |
-
// Check if mediaDevices is supported
|
299 |
-
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
300 |
-
const errorMsg = 'Your browser does not support audio recording. Please use Chrome, Firefox, or Edge.';
|
301 |
-
showError(errorMsg);
|
302 |
-
throw new Error(errorMsg);
|
303 |
-
}
|
304 |
-
|
305 |
-
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
306 |
-
stream.getTracks().forEach(track => peerConnection.addTrack(track, stream));
|
307 |
-
|
308 |
-
// Update audio visualization setup
|
309 |
-
audioContext = new AudioContext();
|
310 |
-
analyser_input = audioContext.createAnalyser();
|
311 |
-
const source = audioContext.createMediaStreamSource(stream);
|
312 |
-
source.connect(analyser_input);
|
313 |
-
analyser_input.fftSize = 64;
|
314 |
-
dataArray_input = new Uint8Array(analyser_input.frequencyBinCount);
|
315 |
-
|
316 |
-
function updateAudioLevel() {
|
317 |
-
analyser_input.getByteFrequencyData(dataArray_input);
|
318 |
-
const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length;
|
319 |
-
const audioLevel = average / 255;
|
320 |
-
|
321 |
-
const pulseCircle = document.querySelector('.pulse-circle');
|
322 |
-
if (pulseCircle) {
|
323 |
-
console.log("audioLevel", audioLevel);
|
324 |
-
pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
|
325 |
-
}
|
326 |
-
|
327 |
-
animationId = requestAnimationFrame(updateAudioLevel);
|
328 |
-
}
|
329 |
-
updateAudioLevel();
|
330 |
-
|
331 |
-
// Add connection state change listener
|
332 |
-
peerConnection.addEventListener('connectionstatechange', () => {
|
333 |
-
console.log('connectionstatechange', peerConnection.connectionState);
|
334 |
-
if (peerConnection.connectionState === 'connected') {
|
335 |
-
clearTimeout(timeoutId);
|
336 |
-
const toast = document.getElementById('error-toast');
|
337 |
-
toast.style.display = 'none';
|
338 |
-
}
|
339 |
-
updateButtonState();
|
340 |
-
});
|
341 |
-
|
342 |
-
// Handle incoming audio
|
343 |
-
peerConnection.addEventListener('track', (evt) => {
|
344 |
-
if (audioOutput && audioOutput.srcObject !== evt.streams[0]) {
|
345 |
-
audioOutput.srcObject = evt.streams[0];
|
346 |
-
audioOutput.play();
|
347 |
-
|
348 |
-
// Set up audio visualization on the output stream
|
349 |
-
audioContext = new AudioContext();
|
350 |
-
analyser = audioContext.createAnalyser();
|
351 |
-
const source = audioContext.createMediaStreamSource(evt.streams[0]);
|
352 |
-
source.connect(analyser);
|
353 |
-
analyser.fftSize = 2048;
|
354 |
-
dataArray = new Uint8Array(analyser.frequencyBinCount);
|
355 |
-
updateVisualization();
|
356 |
-
}
|
357 |
-
});
|
358 |
-
|
359 |
-
// Create data channel for messages
|
360 |
-
dataChannel = peerConnection.createDataChannel('text');
|
361 |
-
dataChannel.onmessage = (event) => {
|
362 |
-
const eventJson = JSON.parse(event.data);
|
363 |
-
if (eventJson.type === "error") {
|
364 |
-
showError(eventJson.message);
|
365 |
-
} else if (eventJson.type === "send_input") {
|
366 |
-
fetch('/input_hook', {
|
367 |
-
method: 'POST',
|
368 |
-
headers: {
|
369 |
-
'Content-Type': 'application/json',
|
370 |
-
},
|
371 |
-
body: JSON.stringify({
|
372 |
-
webrtc_id: webrtc_id,
|
373 |
-
api_key: apiKeyInput.value,
|
374 |
-
voice_name: voiceSelect.value
|
375 |
-
})
|
376 |
-
});
|
377 |
-
}
|
378 |
-
};
|
379 |
-
|
380 |
-
// Create and send offer
|
381 |
-
const offer = await peerConnection.createOffer();
|
382 |
-
await peerConnection.setLocalDescription(offer);
|
383 |
-
|
384 |
-
await new Promise((resolve) => {
|
385 |
-
if (peerConnection.iceGatheringState === "complete") {
|
386 |
-
resolve();
|
387 |
-
} else {
|
388 |
-
const checkState = () => {
|
389 |
-
if (peerConnection.iceGatheringState === "complete") {
|
390 |
-
peerConnection.removeEventListener("icegatheringstatechange", checkState);
|
391 |
-
resolve();
|
392 |
-
}
|
393 |
-
};
|
394 |
-
peerConnection.addEventListener("icegatheringstatechange", checkState);
|
395 |
-
}
|
396 |
-
});
|
397 |
-
|
398 |
-
const response = await fetch('/webrtc/offer', {
|
399 |
-
method: 'POST',
|
400 |
-
headers: { 'Content-Type': 'application/json' },
|
401 |
-
body: JSON.stringify({
|
402 |
-
sdp: peerConnection.localDescription.sdp,
|
403 |
-
type: peerConnection.localDescription.type,
|
404 |
-
webrtc_id: webrtc_id,
|
405 |
-
})
|
406 |
-
});
|
407 |
-
|
408 |
-
const serverResponse = await response.json();
|
409 |
-
|
410 |
-
if (serverResponse.status === 'failed') {
|
411 |
-
showError(serverResponse.meta.error === 'concurrency_limit_reached'
|
412 |
-
? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
|
413 |
-
: serverResponse.meta.error);
|
414 |
-
stopWebRTC();
|
415 |
-
startButton.textContent = 'Start Recording';
|
416 |
-
return;
|
417 |
-
}
|
418 |
-
|
419 |
-
await peerConnection.setRemoteDescription(serverResponse);
|
420 |
-
} catch (err) {
|
421 |
-
clearTimeout(timeoutId);
|
422 |
-
console.error('Error setting up WebRTC:', err);
|
423 |
-
showError('Failed to establish connection. Please try again.');
|
424 |
-
stopWebRTC();
|
425 |
-
startButton.textContent = 'Start Recording';
|
426 |
-
}
|
427 |
-
}
|
428 |
-
|
429 |
-
function updateVisualization() {
|
430 |
-
if (!analyser) return;
|
431 |
-
|
432 |
-
analyser.getByteFrequencyData(dataArray);
|
433 |
-
const bars = document.querySelectorAll('.box');
|
434 |
-
|
435 |
-
for (let i = 0; i < bars.length; i++) {
|
436 |
-
const barHeight = (dataArray[i] / 255) * 2;
|
437 |
-
bars[i].style.transform = `scaleY(${Math.max(0.1, barHeight)})`;
|
438 |
-
}
|
439 |
-
|
440 |
-
animationId = requestAnimationFrame(updateVisualization);
|
441 |
-
}
|
442 |
-
|
443 |
-
function stopWebRTC() {
|
444 |
-
if (peerConnection) {
|
445 |
-
// Clean up all transceivers and tracks
|
446 |
-
if (peerConnection.getTransceivers) {
|
447 |
-
peerConnection.getTransceivers().forEach(transceiver => {
|
448 |
-
if (transceiver.stop) {
|
449 |
-
transceiver.stop();
|
450 |
-
}
|
451 |
-
});
|
452 |
-
}
|
453 |
-
|
454 |
-
if (peerConnection.getSenders) {
|
455 |
-
peerConnection.getSenders().forEach(sender => {
|
456 |
-
if (sender.track && sender.track.stop) sender.track.stop();
|
457 |
-
});
|
458 |
-
}
|
459 |
-
|
460 |
-
peerConnection.close();
|
461 |
-
peerConnection = null;
|
462 |
-
}
|
463 |
-
|
464 |
-
if (animationId) {
|
465 |
-
cancelAnimationFrame(animationId);
|
466 |
-
animationId = null;
|
467 |
-
}
|
468 |
-
|
469 |
-
if (audioContext) {
|
470 |
-
audioContext.close();
|
471 |
-
audioContext = null;
|
472 |
-
}
|
473 |
-
|
474 |
-
updateButtonState();
|
475 |
-
}
|
476 |
-
|
477 |
-
startButton.addEventListener('click', () => {
|
478 |
-
// First check for browser support
|
479 |
-
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
480 |
-
showError('Your browser does not support audio recording. Please use Chrome, Firefox, or Edge.');
|
481 |
-
return;
|
482 |
-
}
|
483 |
-
|
484 |
-
if (!isRecording) {
|
485 |
-
setupWebRTC();
|
486 |
-
startButton.classList.add('recording');
|
487 |
-
} else {
|
488 |
-
stopWebRTC();
|
489 |
-
startButton.classList.remove('recording');
|
490 |
-
}
|
491 |
-
isRecording = !isRecording;
|
492 |
-
});
|
493 |
-
</script>
|
494 |
-
</body>
|
495 |
-
|
496 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|