ishworrsubedii commited on
Commit
8539a09
·
1 Parent(s): c06a600

add: urdu, llm

Browse files
Files changed (4) hide show
  1. .gitignore +3 -0
  2. app.py +37 -13
  3. index.html +135 -3
  4. index2.html +0 -496
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .idea
2
+ __pycache__
3
+ .env
app.py CHANGED
@@ -22,9 +22,12 @@ from google.genai.types import (
22
  PrebuiltVoiceConfig,
23
  SpeechConfig,
24
  VoiceConfig,
 
 
25
  )
26
  from gradio.utils import get_space
27
  from pydantic import BaseModel
 
28
  current_dir = pathlib.Path(__file__).parent
29
  load_dotenv()
30
  api_key = os.getenv("GEMINI_API_KEY")
@@ -32,7 +35,6 @@ if not api_key:
32
  raise ValueError("GEMINI_API_KEY environment variable is not set")
33
 
34
 
35
-
36
  def encode_audio(data: np.ndarray) -> str:
37
  """Encode Audio data to send to the server"""
38
  return base64.b64encode(data.tobytes()).decode("UTF-8")
@@ -42,10 +44,10 @@ class GeminiHandler(AsyncStreamHandler):
42
  """Handler for the Gemini API"""
43
 
44
  def __init__(
45
- self,
46
- expected_layout: Literal["mono"] = "mono",
47
- output_sample_rate: int = 24000,
48
- output_frame_size: int = 480,
49
  ) -> None:
50
  super().__init__(
51
  expected_layout,
@@ -82,16 +84,40 @@ class GeminiHandler(AsyncStreamHandler):
82
  speech_config=SpeechConfig(
83
  voice_config=VoiceConfig(
84
  prebuilt_voice_config=PrebuiltVoiceConfig(
85
- voice_name=voice_name, # Now voice_name is a string
86
  )
87
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  )
89
  )
90
  async with client.aio.live.connect(
91
- model="gemini-2.0-flash-exp", config=config
92
  ) as session:
93
  async for audio in session.start_stream(
94
- stream=self.stream(), mime_type="audio/pcm"
95
  ):
96
  if audio.data:
97
  array = np.frombuffer(audio.data, dtype=np.int16)
@@ -123,10 +149,9 @@ stream = Stream(
123
  mode="send-receive",
124
  handler=GeminiHandler(),
125
  rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
126
- concurrency_limit=5 if get_space() else None,
127
  time_limit=90 if get_space() else None,
128
  additional_inputs=[
129
-
130
  gr.Dropdown(
131
  label="Voice",
132
  choices=[
@@ -147,7 +172,6 @@ class InputData(BaseModel):
147
  voice_name: str
148
 
149
 
150
-
151
  app = FastAPI()
152
 
153
  stream.mount(app)
@@ -155,7 +179,7 @@ stream.mount(app)
155
 
156
  @app.post("/input_hook")
157
  async def _(body: InputData):
158
- stream.set_input(body.webrtc_id, body.voice_name)
159
  return {"status": "ok"}
160
 
161
 
@@ -181,4 +205,4 @@ if __name__ == "__main__":
181
  elif mode == "PHONE":
182
  stream.fastphone(host="localhost", port=7860)
183
  else:
184
- uvicorn.run(app, host="localhost", port=7860)
 
22
  PrebuiltVoiceConfig,
23
  SpeechConfig,
24
  VoiceConfig,
25
+ Content,
26
+ Part
27
  )
28
  from gradio.utils import get_space
29
  from pydantic import BaseModel
30
+
31
  current_dir = pathlib.Path(__file__).parent
32
  load_dotenv()
33
  api_key = os.getenv("GEMINI_API_KEY")
 
35
  raise ValueError("GEMINI_API_KEY environment variable is not set")
36
 
37
 
 
38
  def encode_audio(data: np.ndarray) -> str:
39
  """Encode Audio data to send to the server"""
40
  return base64.b64encode(data.tobytes()).decode("UTF-8")
 
44
  """Handler for the Gemini API"""
45
 
46
  def __init__(
47
+ self,
48
+ expected_layout: Literal["mono"] = "mono",
49
+ output_sample_rate: int = 24000,
50
+ output_frame_size: int = 480,
51
  ) -> None:
52
  super().__init__(
53
  expected_layout,
 
84
  speech_config=SpeechConfig(
85
  voice_config=VoiceConfig(
86
  prebuilt_voice_config=PrebuiltVoiceConfig(
87
+ voice_name=voice_name,
88
  )
89
  )
90
+ ),
91
+
92
+ system_instruction=Content(
93
+ parts=[Part(
94
+ text="""You are an AI calling assistant for Ishwor Subedi, an AI/ML freelancer. When speaking with clients:
95
+
96
+ 2. For professional inquiries, highlight these key skills concisely:
97
+ - 2+ years in machine learning and AI
98
+ - Computer Vision expertise
99
+ - NLP capabilities
100
+ - Software and mobile app development
101
+ - Upwork freelancer with proven track record
102
+ 3. For generic questions:
103
+ - Provide brief, direct answers (1-2 sentences)
104
+ - Avoid lengthy explanations
105
+ - Always connect responses back to Ishwor's services when possible
106
+ 4. Keep website reference simple: "Visit ishwor-subedi.com.np for portfolio details"
107
+ 5. Speak in Hindi throughout
108
+ 6. For unrelated topics: "Please contact Ishwor directly for assistance with this"
109
+
110
+ Maintain professional tone while keeping all responses concise and focused.
111
+ """)],
112
+ role="user"
113
+
114
  )
115
  )
116
  async with client.aio.live.connect(
117
+ model="gemini-2.0-flash-exp", config=config
118
  ) as session:
119
  async for audio in session.start_stream(
120
+ stream=self.stream(), mime_type="audio/pcm"
121
  ):
122
  if audio.data:
123
  array = np.frombuffer(audio.data, dtype=np.int16)
 
149
  mode="send-receive",
150
  handler=GeminiHandler(),
151
  rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
152
+ concurrency_limit=2,
153
  time_limit=90 if get_space() else None,
154
  additional_inputs=[
 
155
  gr.Dropdown(
156
  label="Voice",
157
  choices=[
 
172
  voice_name: str
173
 
174
 
 
175
  app = FastAPI()
176
 
177
  stream.mount(app)
 
179
 
180
  @app.post("/input_hook")
181
  async def _(body: InputData):
182
+ stream.set_input(body.webrtc_id, body.voice_name)
183
  return {"status": "ok"}
184
 
185
 
 
205
  elif mode == "PHONE":
206
  stream.fastphone(host="localhost", port=7860)
207
  else:
208
+ uvicorn.run(app, host="localhost", port=7860)
index.html CHANGED
@@ -298,6 +298,108 @@
298
  .footer a:hover {
299
  text-decoration: underline;
300
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  </style>
302
  </head>
303
 
@@ -305,11 +407,30 @@
305
  <div id="error-toast" class="toast"></div>
306
 
307
  <div class="header">
308
- <h1>Voice Assistant</h1>
309
- <p>Real-time voice conversations with advanced AI technology</p>
 
 
 
 
310
  </div>
311
 
312
  <div class="container">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  <div class="controls">
314
  <!-- <div class="input-group">
315
  <label for="api-key"><i class="fas fa-key"></i> API Key</label>
@@ -337,7 +458,18 @@
337
  </div>
338
 
339
  <div class="footer">
340
- <p>Powered by advanced AI technology</p>
 
 
 
 
 
 
 
 
 
 
 
341
  </div>
342
 
343
  <audio id="audio-output"></audio>
 
298
  .footer a:hover {
299
  text-decoration: underline;
300
  }
301
+
302
+ .subtitle {
303
+ font-size: 1.25rem;
304
+ color: var(--color-text-secondary);
305
+ margin-bottom: 1rem;
306
+ }
307
+
308
+ .language-badge {
309
+ display: inline-block;
310
+ padding: 0.5rem 1rem;
311
+ background: var(--gradient);
312
+ border-radius: 2rem;
313
+ font-size: 1rem;
314
+ margin-top: 1rem;
315
+ box-shadow: var(--box-shadow);
316
+ }
317
+
318
+ .feature-badges {
319
+ display: flex;
320
+ justify-content: center;
321
+ gap: 1rem;
322
+ margin-bottom: 2rem;
323
+ flex-wrap: wrap;
324
+ }
325
+
326
+ .badge {
327
+ display: flex;
328
+ align-items: center;
329
+ gap: 0.5rem;
330
+ padding: 0.75rem 1.25rem;
331
+ background-color: var(--color-surface-light);
332
+ border-radius: 1rem;
333
+ font-size: 0.875rem;
334
+ border: 1px solid rgba(255, 255, 255, 0.05);
335
+ }
336
+
337
+ .badge i {
338
+ color: var(--color-primary-light);
339
+ }
340
+
341
+ .developer-info {
342
+ display: flex;
343
+ align-items: center;
344
+ gap: 1rem;
345
+ margin-bottom: 1rem;
346
+ }
347
+
348
+ .developer-avatar {
349
+ width: 48px;
350
+ height: 48px;
351
+ border-radius: 50%;
352
+ border: 2px solid var(--color-primary);
353
+ }
354
+
355
+ .developer-details {
356
+ text-align: left;
357
+ }
358
+
359
+ .developer-title {
360
+ font-size: 0.75rem;
361
+ color: var(--color-primary-light);
362
+ margin-top: 0.25rem;
363
+ }
364
+
365
+ .social-links {
366
+ display: flex;
367
+ gap: 1rem;
368
+ margin-top: 1rem;
369
+ }
370
+
371
+ .social-links a {
372
+ color: var(--color-text-secondary);
373
+ font-size: 1.25rem;
374
+ transition: color 0.2s ease;
375
+ }
376
+
377
+ .social-links a:hover {
378
+ color: var(--color-primary-light);
379
+ }
380
+
381
+ .footer {
382
+ background-color: var(--color-surface);
383
+ padding: 1.5rem;
384
+ border-radius: 1rem;
385
+ margin-top: 2rem;
386
+ box-shadow: var(--box-shadow);
387
+ border: 1px solid rgba(255, 255, 255, 0.05);
388
+ max-width: 600px;
389
+ width: 90%;
390
+ }
391
+
392
+ @media (max-width: 768px) {
393
+ .feature-badges {
394
+ flex-direction: column;
395
+ align-items: center;
396
+ }
397
+
398
+ .badge {
399
+ width: 100%;
400
+ justify-content: center;
401
+ }
402
+ }
403
  </style>
404
  </head>
405
 
 
407
  <div id="error-toast" class="toast"></div>
408
 
409
  <div class="header">
410
+ <h1>Urdu AI Assistant</h1>
411
+ <p class="subtitle">Real-time Urdu conversations powered by advanced AI technology</p>
412
+ <div class="language-badge">
413
+ <i class="fas fa-language"></i>
414
+ हिंदी / اردو
415
+ </div>
416
  </div>
417
 
418
  <div class="container">
419
+ <div class="feature-badges">
420
+ <div class="badge">
421
+ <i class="fas fa-microphone-alt"></i>
422
+ Real-time Voice
423
+ </div>
424
+ <div class="badge">
425
+ <i class="fas fa-brain"></i>
426
+ AI Powered
427
+ </div>
428
+ <div class="badge">
429
+ <i class="fas fa-language"></i>
430
+ Urdu Support
431
+ </div>
432
+ </div>
433
+
434
  <div class="controls">
435
  <!-- <div class="input-group">
436
  <label for="api-key"><i class="fas fa-key"></i> API Key</label>
 
458
  </div>
459
 
460
  <div class="footer">
461
+ <div class="developer-info">
462
+ <img src="https://media.licdn.com/dms/image/v2/D4D03AQFccVUQVirANg/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1724430812003?e=1746662400&v=beta&t=w7dT8KntFGMuWX8sIb6J53xSsiV-M_MXLYBnBNVCARg" alt="Ishwor Subedi" class="developer-avatar">
463
+ <div class="developer-details">
464
+ <p>Developed by <a href="https://ishwor-subedi.com.np" target="_blank">Ishwor Subedi</a></p>
465
+ <p class="developer-title">AI/ML Engineer & Freelancer</p>
466
+ </div>
467
+ </div>
468
+ <div class="social-links">
469
+ <a href="https://github.com/ishworrsubedii" target="_blank"><i class="fab fa-github"></i></a>
470
+ <a href="https://www.linkedin.com/in/ishworrsubedii/" target="_blank"><i class="fab fa-linkedin"></i></a>
471
+ <a href="https://www.upwork.com/freelancers/~01a5bd20f3bdbf5bde" target="_blank"><i class="fab fa-upwork"></i></a>
472
+ </div>
473
  </div>
474
 
475
  <audio id="audio-output"></audio>
index2.html DELETED
@@ -1,496 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
-
4
- <head>
5
- <meta charset="UTF-8">
6
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
- <title>Gemini Voice Chat</title>
8
- <style>
9
- :root {
10
- --color-accent: #6366f1;
11
- --color-background: #0f172a;
12
- --color-surface: #1e293b;
13
- --color-text: #e2e8f0;
14
- --boxSize: 8px;
15
- --gutter: 4px;
16
- }
17
-
18
- body {
19
- margin: 0;
20
- padding: 0;
21
- background-color: var(--color-background);
22
- color: var(--color-text);
23
- font-family: system-ui, -apple-system, sans-serif;
24
- min-height: 100vh;
25
- display: flex;
26
- flex-direction: column;
27
- align-items: center;
28
- justify-content: center;
29
- }
30
-
31
- .container {
32
- width: 90%;
33
- max-width: 800px;
34
- background-color: var(--color-surface);
35
- padding: 2rem;
36
- border-radius: 1rem;
37
- box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
38
- }
39
-
40
- .wave-container {
41
- position: relative;
42
- display: flex;
43
- min-height: 100px;
44
- max-height: 128px;
45
- justify-content: center;
46
- align-items: center;
47
- margin: 2rem 0;
48
- }
49
-
50
- .box-container {
51
- display: flex;
52
- justify-content: space-between;
53
- height: 64px;
54
- width: 100%;
55
- }
56
-
57
- .box {
58
- height: 100%;
59
- width: var(--boxSize);
60
- background: var(--color-accent);
61
- border-radius: 8px;
62
- transition: transform 0.05s ease;
63
- }
64
-
65
- .controls {
66
- display: grid;
67
- gap: 1rem;
68
- margin-bottom: 2rem;
69
- }
70
-
71
- .input-group {
72
- display: flex;
73
- flex-direction: column;
74
- gap: 0.5rem;
75
- }
76
-
77
- label {
78
- font-size: 0.875rem;
79
- font-weight: 500;
80
- }
81
-
82
- input,
83
- select {
84
- padding: 0.75rem;
85
- border-radius: 0.5rem;
86
- border: 1px solid rgba(255, 255, 255, 0.1);
87
- background-color: var(--color-background);
88
- color: var(--color-text);
89
- font-size: 1rem;
90
- }
91
-
92
- button {
93
- padding: 1rem 2rem;
94
- border-radius: 0.5rem;
95
- border: none;
96
- background-color: var(--color-accent);
97
- color: white;
98
- font-weight: 600;
99
- cursor: pointer;
100
- transition: all 0.2s ease;
101
- }
102
-
103
- button:hover {
104
- opacity: 0.9;
105
- transform: translateY(-1px);
106
- }
107
-
108
- .icon-with-spinner {
109
- display: flex;
110
- align-items: center;
111
- justify-content: center;
112
- gap: 12px;
113
- min-width: 180px;
114
- }
115
-
116
- .spinner {
117
- width: 20px;
118
- height: 20px;
119
- border: 2px solid white;
120
- border-top-color: transparent;
121
- border-radius: 50%;
122
- animation: spin 1s linear infinite;
123
- flex-shrink: 0;
124
- }
125
-
126
- @keyframes spin {
127
- to {
128
- transform: rotate(360deg);
129
- }
130
- }
131
-
132
- .pulse-container {
133
- display: flex;
134
- align-items: center;
135
- justify-content: center;
136
- gap: 12px;
137
- min-width: 180px;
138
- }
139
-
140
- .pulse-circle {
141
- width: 20px;
142
- height: 20px;
143
- border-radius: 50%;
144
- background-color: white;
145
- opacity: 0.2;
146
- flex-shrink: 0;
147
- transform: translateX(-0%) scale(var(--audio-level, 1));
148
- transition: transform 0.1s ease;
149
- }
150
-
151
- /* Add styles for toast notifications */
152
- .toast {
153
- position: fixed;
154
- top: 20px;
155
- left: 50%;
156
- transform: translateX(-50%);
157
- padding: 16px 24px;
158
- border-radius: 4px;
159
- font-size: 14px;
160
- z-index: 1000;
161
- display: none;
162
- box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
163
- }
164
-
165
- .toast.error {
166
- background-color: #f44336;
167
- color: white;
168
- }
169
-
170
- .toast.warning {
171
- background-color: #ffd700;
172
- color: black;
173
- }
174
- </style>
175
- </head>
176
-
177
-
178
- <body>
179
- <!-- Add toast element after body opening tag -->
180
- <div id="error-toast" class="toast"></div>
181
- <div style="text-align: center">
182
- <h1>Gemini Voice Chat</h1>
183
- <p>Speak with Gemini using real-time audio streaming</p>
184
- <p>
185
- Get a Gemini API key
186
- <a href="https://ai.google.dev/gemini-api/docs/api-key">here</a>
187
- </p>
188
- </div>
189
- <div class="container">
190
- <div class="controls">
191
- <div class="input-group">
192
- <label for="api-key">API Key</label>
193
- <input type="password" id="api-key" placeholder="Enter your API key">
194
- </div>
195
- <div class="input-group">
196
- <label for="voice">Voice</label>
197
- <select id="voice">
198
- <option value="Puck">Puck</option>
199
- <option value="Charon">Charon</option>
200
- <option value="Kore">Kore</option>
201
- <option value="Fenrir">Fenrir</option>
202
- <option value="Aoede">Aoede</option>
203
- </select>
204
- </div>
205
- </div>
206
-
207
- <div class="wave-container">
208
- <div class="box-container">
209
- <!-- Boxes will be dynamically added here -->
210
- </div>
211
- </div>
212
-
213
- <button id="start-button">Start Recording</button>
214
- </div>
215
-
216
- <audio id="audio-output"></audio>
217
-
218
- <script>
219
- let peerConnection;
220
- let audioContext;
221
- let dataChannel;
222
- let isRecording = false;
223
- let webrtc_id;
224
- let animationId;
225
- let analyser;
226
- let analyser_input;
227
- let dataArray;
228
- let dataArray_input;
229
-
230
- const startButton = document.getElementById('start-button');
231
- const apiKeyInput = document.getElementById('api-key');
232
- const voiceSelect = document.getElementById('voice');
233
- const audioOutput = document.getElementById('audio-output');
234
- const boxContainer = document.querySelector('.box-container');
235
-
236
- const numBars = 32;
237
- for (let i = 0; i < numBars; i++) {
238
- const box = document.createElement('div');
239
- box.className = 'box';
240
- boxContainer.appendChild(box);
241
- }
242
-
243
- function updateButtonState() {
244
- if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
245
- startButton.innerHTML = `
246
- <div class="icon-with-spinner">
247
- <div class="spinner"></div>
248
- <span>Connecting...</span>
249
- </div>
250
- `;
251
- } else if (peerConnection && peerConnection.connectionState === 'connected') {
252
- startButton.innerHTML = `
253
- <div class="pulse-container">
254
- <div class="pulse-circle"></div>
255
- <span>Stop Recording</span>
256
- </div>
257
- `;
258
- } else {
259
- startButton.innerHTML = 'Start Recording';
260
- }
261
- }
262
-
263
- function showError(message) {
264
- const toast = document.getElementById('error-toast');
265
- toast.textContent = message;
266
- toast.className = 'toast error';
267
- toast.style.display = 'block';
268
-
269
- // Hide toast after 5 seconds
270
- setTimeout(() => {
271
- toast.style.display = 'none';
272
- }, 5000);
273
- }
274
-
275
- async function setupWebRTC() {
276
- // Replace placeholder with actual configuration
277
- const config = {
278
- iceServers: [
279
- { urls: 'stun:stun.l.google.com:19302' }
280
- ]
281
- };
282
- peerConnection = new RTCPeerConnection(config);
283
- webrtc_id = Math.random().toString(36).substring(7);
284
-
285
- const timeoutId = setTimeout(() => {
286
- const toast = document.getElementById('error-toast');
287
- toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
288
- toast.className = 'toast warning';
289
- toast.style.display = 'block';
290
-
291
- // Hide warning after 5 seconds
292
- setTimeout(() => {
293
- toast.style.display = 'none';
294
- }, 5000);
295
- }, 5000);
296
-
297
- try {
298
- // Check if mediaDevices is supported
299
- if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
300
- const errorMsg = 'Your browser does not support audio recording. Please use Chrome, Firefox, or Edge.';
301
- showError(errorMsg);
302
- throw new Error(errorMsg);
303
- }
304
-
305
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
306
- stream.getTracks().forEach(track => peerConnection.addTrack(track, stream));
307
-
308
- // Update audio visualization setup
309
- audioContext = new AudioContext();
310
- analyser_input = audioContext.createAnalyser();
311
- const source = audioContext.createMediaStreamSource(stream);
312
- source.connect(analyser_input);
313
- analyser_input.fftSize = 64;
314
- dataArray_input = new Uint8Array(analyser_input.frequencyBinCount);
315
-
316
- function updateAudioLevel() {
317
- analyser_input.getByteFrequencyData(dataArray_input);
318
- const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length;
319
- const audioLevel = average / 255;
320
-
321
- const pulseCircle = document.querySelector('.pulse-circle');
322
- if (pulseCircle) {
323
- console.log("audioLevel", audioLevel);
324
- pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
325
- }
326
-
327
- animationId = requestAnimationFrame(updateAudioLevel);
328
- }
329
- updateAudioLevel();
330
-
331
- // Add connection state change listener
332
- peerConnection.addEventListener('connectionstatechange', () => {
333
- console.log('connectionstatechange', peerConnection.connectionState);
334
- if (peerConnection.connectionState === 'connected') {
335
- clearTimeout(timeoutId);
336
- const toast = document.getElementById('error-toast');
337
- toast.style.display = 'none';
338
- }
339
- updateButtonState();
340
- });
341
-
342
- // Handle incoming audio
343
- peerConnection.addEventListener('track', (evt) => {
344
- if (audioOutput && audioOutput.srcObject !== evt.streams[0]) {
345
- audioOutput.srcObject = evt.streams[0];
346
- audioOutput.play();
347
-
348
- // Set up audio visualization on the output stream
349
- audioContext = new AudioContext();
350
- analyser = audioContext.createAnalyser();
351
- const source = audioContext.createMediaStreamSource(evt.streams[0]);
352
- source.connect(analyser);
353
- analyser.fftSize = 2048;
354
- dataArray = new Uint8Array(analyser.frequencyBinCount);
355
- updateVisualization();
356
- }
357
- });
358
-
359
- // Create data channel for messages
360
- dataChannel = peerConnection.createDataChannel('text');
361
- dataChannel.onmessage = (event) => {
362
- const eventJson = JSON.parse(event.data);
363
- if (eventJson.type === "error") {
364
- showError(eventJson.message);
365
- } else if (eventJson.type === "send_input") {
366
- fetch('/input_hook', {
367
- method: 'POST',
368
- headers: {
369
- 'Content-Type': 'application/json',
370
- },
371
- body: JSON.stringify({
372
- webrtc_id: webrtc_id,
373
- api_key: apiKeyInput.value,
374
- voice_name: voiceSelect.value
375
- })
376
- });
377
- }
378
- };
379
-
380
- // Create and send offer
381
- const offer = await peerConnection.createOffer();
382
- await peerConnection.setLocalDescription(offer);
383
-
384
- await new Promise((resolve) => {
385
- if (peerConnection.iceGatheringState === "complete") {
386
- resolve();
387
- } else {
388
- const checkState = () => {
389
- if (peerConnection.iceGatheringState === "complete") {
390
- peerConnection.removeEventListener("icegatheringstatechange", checkState);
391
- resolve();
392
- }
393
- };
394
- peerConnection.addEventListener("icegatheringstatechange", checkState);
395
- }
396
- });
397
-
398
- const response = await fetch('/webrtc/offer', {
399
- method: 'POST',
400
- headers: { 'Content-Type': 'application/json' },
401
- body: JSON.stringify({
402
- sdp: peerConnection.localDescription.sdp,
403
- type: peerConnection.localDescription.type,
404
- webrtc_id: webrtc_id,
405
- })
406
- });
407
-
408
- const serverResponse = await response.json();
409
-
410
- if (serverResponse.status === 'failed') {
411
- showError(serverResponse.meta.error === 'concurrency_limit_reached'
412
- ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
413
- : serverResponse.meta.error);
414
- stopWebRTC();
415
- startButton.textContent = 'Start Recording';
416
- return;
417
- }
418
-
419
- await peerConnection.setRemoteDescription(serverResponse);
420
- } catch (err) {
421
- clearTimeout(timeoutId);
422
- console.error('Error setting up WebRTC:', err);
423
- showError('Failed to establish connection. Please try again.');
424
- stopWebRTC();
425
- startButton.textContent = 'Start Recording';
426
- }
427
- }
428
-
429
- function updateVisualization() {
430
- if (!analyser) return;
431
-
432
- analyser.getByteFrequencyData(dataArray);
433
- const bars = document.querySelectorAll('.box');
434
-
435
- for (let i = 0; i < bars.length; i++) {
436
- const barHeight = (dataArray[i] / 255) * 2;
437
- bars[i].style.transform = `scaleY(${Math.max(0.1, barHeight)})`;
438
- }
439
-
440
- animationId = requestAnimationFrame(updateVisualization);
441
- }
442
-
443
- function stopWebRTC() {
444
- if (peerConnection) {
445
- // Clean up all transceivers and tracks
446
- if (peerConnection.getTransceivers) {
447
- peerConnection.getTransceivers().forEach(transceiver => {
448
- if (transceiver.stop) {
449
- transceiver.stop();
450
- }
451
- });
452
- }
453
-
454
- if (peerConnection.getSenders) {
455
- peerConnection.getSenders().forEach(sender => {
456
- if (sender.track && sender.track.stop) sender.track.stop();
457
- });
458
- }
459
-
460
- peerConnection.close();
461
- peerConnection = null;
462
- }
463
-
464
- if (animationId) {
465
- cancelAnimationFrame(animationId);
466
- animationId = null;
467
- }
468
-
469
- if (audioContext) {
470
- audioContext.close();
471
- audioContext = null;
472
- }
473
-
474
- updateButtonState();
475
- }
476
-
477
- startButton.addEventListener('click', () => {
478
- // First check for browser support
479
- if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
480
- showError('Your browser does not support audio recording. Please use Chrome, Firefox, or Edge.');
481
- return;
482
- }
483
-
484
- if (!isRecording) {
485
- setupWebRTC();
486
- startButton.classList.add('recording');
487
- } else {
488
- stopWebRTC();
489
- startButton.classList.remove('recording');
490
- }
491
- isRecording = !isRecording;
492
- });
493
- </script>
494
- </body>
495
-
496
- </html>