Richard commited on
Commit
09ed935
·
0 Parent(s):

Initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # System
2
+ .DS_Store
3
+
4
+ # Python
5
+ __pycache__
6
+
7
+ # VS Code
8
+ *.code-workspace
9
+ .pytest_cache
10
+
11
+ # Dyad
12
+ .dyad
13
+
14
+ # App
15
+ .env
16
+ data
README.md ADDED
File without changes
css.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import mesop as me
2
+
3
+ from state import State
4
+
5
+ COLOR_BLUE = "blue"
6
+ COLOR_YELLOW = "#f0cd6e"
7
+ COLOR_RED = "#cc153c"
8
+ COLOR_DISABLED = "#e4e4e4"
9
+ COLOR_DISABLED_BUTTON_BG = "#ccc"
10
+
11
+
12
+ MAIN_COL_GRID = me.Style(
13
+ background="#ececec",
14
+ display="grid",
15
+ grid_template_columns="70% 30%",
16
+ height="100vh",
17
+ )
18
+
19
+ SIDEBAR = me.Style(
20
+ color="#111",
21
+ overflow_y="scroll",
22
+ padding=me.Padding.all(20),
23
+ )
24
+
25
+ SIDEBAR_SECTION = me.Style(margin=me.Margin(bottom=15))
26
+
27
+ TOOLBAR_SECTION = me.Style(
28
+ margin=me.Margin(bottom=15),
29
+ padding=me.Padding.all(5),
30
+ background=me.theme_var("surface-container-highest"),
31
+ justify_content="space-evenly",
32
+ display="flex",
33
+ flex_direction="row",
34
+ )
35
+
36
+ TEXT_INPUT = me.Style(width="100%")
37
+
38
+
39
+ def sidebar_header() -> me.Style:
40
+ state = me.state(State)
41
+ return me.Style(color="#000" if state.gemini_live_api_enabled else "#aaa")
42
+
43
+
44
+ def game_button() -> me.Style:
45
+ state = me.state(State)
46
+ if not state.api_key:
47
+ return me.Style()
48
+ if state.gemini_live_api_enabled:
49
+ return me.Style(background=me.theme_var("error"), color=me.theme_var("on-error"))
50
+ return me.Style(background=me.theme_var("primary"), color=me.theme_var("on-primary"))
51
+
52
+
53
+ def audio_button() -> me.Style:
54
+ state = me.state(State)
55
+ if state.audio_player_enabled:
56
+ return me.Style(background=me.theme_var("tertiary"), color=me.theme_var("on-tertiary"))
57
+ return me.Style()
58
+
59
+
60
+ def mic_button() -> me.Style:
61
+ state = me.state(State)
62
+ if state.audio_recorder_state == "recording":
63
+ return me.Style(background=me.theme_var("tertiary"), color=me.theme_var("on-tertiary"))
64
+ if state.gemini_live_api_enabled:
65
+ return me.Style(background=me.theme_var("error"), color=me.theme_var("on-error"))
66
+ return me.Style()
67
+
68
+
69
+ def score_box() -> me.Style:
70
+ state = me.state(State)
71
+ return me.Style(
72
+ background=COLOR_BLUE if state.gemini_live_api_enabled else COLOR_DISABLED,
73
+ color="white" if state.gemini_live_api_enabled else COLOR_DISABLED,
74
+ font_weight="bold",
75
+ font_size="2.2vw",
76
+ padding=me.Padding.all(15),
77
+ text_align="center",
78
+ )
79
+
80
+
81
+ def current_clue_box() -> me.Style:
82
+ state = me.state(State)
83
+ return me.Style(
84
+ background=COLOR_BLUE if state.gemini_live_api_enabled else COLOR_DISABLED,
85
+ color=COLOR_YELLOW if state.gemini_live_api_enabled else COLOR_DISABLED,
86
+ font_size="1em",
87
+ font_weight="bold",
88
+ padding=me.Padding.all(15),
89
+ )
90
+
91
+
92
+ def board_col_grid() -> me.Style:
93
+ state = me.state(State)
94
+ return me.Style(
95
+ background="#000" if state.gemini_live_api_enabled else "#ddd",
96
+ display="grid",
97
+ gap="5px",
98
+ grid_template_columns="repeat(6, 1fr)",
99
+ )
100
+
101
+
102
+ def category_box() -> me.Style:
103
+ state = me.state(State)
104
+ return me.Style(
105
+ background=COLOR_BLUE if state.gemini_live_api_enabled else COLOR_DISABLED,
106
+ color="white",
107
+ font_weight="bold",
108
+ font_size="1em",
109
+ padding=me.Padding.all(15),
110
+ text_align="center",
111
+ )
112
+
113
+
114
+ def clue_box(is_selectable: bool) -> me.Style:
115
+ """Style for clue box
116
+
117
+ Args:
118
+ is_selectable: Visual signify if the clue is selectable.
119
+ """
120
+ state = me.state(State)
121
+ return me.Style(
122
+ background=COLOR_BLUE if state.gemini_live_api_enabled else COLOR_DISABLED,
123
+ color=COLOR_YELLOW,
124
+ cursor="pointer" if is_selectable else "default",
125
+ font_size="1em",
126
+ font_weight="bold",
127
+ padding=me.Padding.all(15),
128
+ text_align="center",
129
+ )
130
+
131
+
132
+ def response_button(disabled: bool) -> me.Style:
133
+ """Styles for response submit button.
134
+
135
+ Args:
136
+ disabled: Since we're overriding the style, we need to handle disabled state
137
+ """
138
+ if disabled:
139
+ return me.Style(background=COLOR_DISABLED_BUTTON_BG, color="#eee")
140
+ return me.Style(background=COLOR_BLUE, color="white")
141
+
142
+
143
+ def score_text(score: int) -> me.Style:
144
+ """In Jeopardy when the score is negative, it is red instead of white."""
145
+ state = me.state(State)
146
+ if not state.gemini_live_api_enabled:
147
+ return me.Style(color=COLOR_DISABLED)
148
+
149
+ if score < 0:
150
+ return me.Style(color=COLOR_RED)
151
+
152
+ return me.Style(color="white")
main.py ADDED
@@ -0,0 +1,381 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import time
3
+
4
+ import css
5
+ import trebek_bot
6
+ from models import Clue
7
+ import mesop as me
8
+ import mesop.labs as mel
9
+ from web_components.gemini_live_connection import gemini_live_connection
10
+ from web_components.audio_recorder import audio_recorder
11
+ from web_components.audio_player import audio_player
12
+ from state import State
13
+
14
+
15
+ def on_load(e: me.LoadEvent):
16
+ """Update system instructions with the randomly selected game categories."""
17
+ state = me.state(State)
18
+ categories = [question_set[0].category for question_set in state.board.clues]
19
+ state.gemini_live_api_config = trebek_bot.make_gemini_live_api_config(
20
+ system_instructions=trebek_bot.make_system_instruction(categories)
21
+ )
22
+
23
+
24
+ @me.page(
25
+ path="/",
26
+ title="Mesop Jeopardy Live",
27
+ security_policy=me.SecurityPolicy(
28
+ allowed_connect_srcs=["wss://generativelanguage.googleapis.com"],
29
+ allowed_iframe_parents=["https://huggingface.co"],
30
+ allowed_script_srcs=[
31
+ "https://cdn.jsdelivr.net",
32
+ ],
33
+ ),
34
+ on_load=on_load,
35
+ )
36
+ def app():
37
+ state = me.state(State)
38
+
39
+ with me.box(style=css.MAIN_COL_GRID):
40
+ with me.box(style=css.board_col_grid()):
41
+ for col_index in range(len(state.board.clues[0])):
42
+ # Render Jeopardy categories
43
+ if col_index == 0:
44
+ for row_index in range(len(state.board.clues)):
45
+ cell = state.board.clues[row_index][col_index]
46
+ with me.box(style=css.category_box()):
47
+ if state.gemini_live_api_enabled:
48
+ me.text(cell.category)
49
+ else:
50
+ me.text("")
51
+
52
+ # Render Jeopardy questions
53
+ for row_index in range(len(state.board.clues)):
54
+ cell = state.board.clues[row_index][col_index]
55
+ key = f"clue-{row_index}-{col_index}"
56
+ is_selectable = not (key in state.answered_questions or state.selected_question_key)
57
+ with me.box(
58
+ style=css.clue_box(state.gemini_live_api_enabled and is_selectable),
59
+ key=key,
60
+ on_click=on_click_cell,
61
+ ):
62
+ if not state.gemini_live_api_enabled:
63
+ me.text("")
64
+ elif key in state.answered_questions:
65
+ me.text("")
66
+ elif key == state.selected_question_key:
67
+ me.text(cell.question, style=me.Style(text_align="left"))
68
+ else:
69
+ me.text(f"${cell.normalized_value}", style=me.Style(font_size="2.2vw"))
70
+
71
+ # Sidebar
72
+ with me.box(style=css.SIDEBAR):
73
+ me.input(
74
+ label="Google API Key",
75
+ on_input=on_input_api_key,
76
+ readonly=state.gemini_live_api_enabled,
77
+ style=css.TEXT_INPUT,
78
+ type="password",
79
+ value=state.api_key,
80
+ )
81
+
82
+ with me.box(style=css.TOOLBAR_SECTION):
83
+ gemini_live_button()
84
+ audio_player_button()
85
+ audio_recorder_button()
86
+
87
+ # Score
88
+ with me.box(style=css.SIDEBAR_SECTION):
89
+ me.text("Score", type="headline-5", style=css.sidebar_header())
90
+ with me.box(style=css.score_box()):
91
+ me.text(format_dollars(state.score), style=css.score_text(state.score))
92
+
93
+ # Clue
94
+ with me.box(style=css.SIDEBAR_SECTION):
95
+ me.text("Clue", type="headline-5", style=css.sidebar_header())
96
+ with me.box(style=css.current_clue_box()):
97
+ if state.selected_question_key:
98
+ selected_question = get_selected_question(state.board, state.selected_question_key)
99
+ me.text(selected_question.question)
100
+ else:
101
+ me.text("No clue selected. Please select one.", style=me.Style(font_style="italic"))
102
+
103
+ # Response
104
+ with me.box(style=css.SIDEBAR_SECTION):
105
+ me.text("Response", type="headline-5", style=css.sidebar_header())
106
+ me.textarea(
107
+ disabled=not bool(state.selected_question_key),
108
+ label="Enter your response",
109
+ on_blur=on_input_response,
110
+ style=css.TEXT_INPUT,
111
+ value=state.response_value,
112
+ )
113
+
114
+ disabled = not bool(state.selected_question_key)
115
+ me.button(
116
+ disabled=disabled,
117
+ label="Submit your response",
118
+ on_click=on_click_submit,
119
+ style=css.response_button(disabled),
120
+ type="flat",
121
+ )
122
+
123
+
124
+ @me.component
125
+ def gemini_live_button():
126
+ state = me.state(State)
127
+ with gemini_live_connection(
128
+ api_config=state.gemini_live_api_config,
129
+ api_key=state.api_key,
130
+ enabled=state.gemini_live_api_enabled,
131
+ on_start=on_gemini_live_api_started,
132
+ on_stop=on_gemini_live_api_stopped,
133
+ on_tool_call=handle_tool_calls,
134
+ text_input=state.text_input,
135
+ tool_call_responses=state.tool_call_responses,
136
+ ):
137
+ with me.tooltip(message=get_gemini_live_tooltip()):
138
+ with me.content_button(
139
+ disabled=not state.api_key,
140
+ style=css.game_button(),
141
+ type="icon",
142
+ ):
143
+ if state.gemini_live_api_enabled:
144
+ me.icon(icon="stop")
145
+ else:
146
+ me.icon(icon="play_arrow")
147
+
148
+
149
+ @me.component
150
+ def audio_player_button():
151
+ state = me.state(State)
152
+ with audio_player(
153
+ enabled=state.audio_player_enabled, on_play=on_audio_play, on_stop=on_audio_stop
154
+ ):
155
+ with me.tooltip(message=get_audio_player_tooltip()):
156
+ with me.content_button(
157
+ disabled=True,
158
+ style=css.audio_button(),
159
+ type="icon",
160
+ ):
161
+ if state.audio_player_enabled:
162
+ me.icon(icon="volume_up")
163
+ else:
164
+ me.icon(icon="volume_mute")
165
+
166
+
167
+ @me.component
168
+ def audio_recorder_button():
169
+ state = me.state(State)
170
+ with audio_recorder(
171
+ state=state.audio_recorder_state, on_state_change=on_audio_recorder_state_change
172
+ ):
173
+ with me.tooltip(message=get_audio_recorder_tooltip()):
174
+ with me.content_button(
175
+ disabled=not state.gemini_live_api_enabled,
176
+ style=css.mic_button(),
177
+ type="icon",
178
+ ):
179
+ if state.audio_recorder_state == "initializing":
180
+ me.icon(icon="pending")
181
+ else:
182
+ me.icon(icon="mic")
183
+
184
+
185
+ def on_click_cell(e: me.ClickEvent):
186
+ """Selects the given clue by prompting Gemini Live API."""
187
+ state = me.state(State)
188
+ clue = get_selected_question(state.board, e.key)
189
+ me.state(State).text_input = f"I'd like to select {clue.category}, for ${clue.normalized_value}."
190
+
191
+
192
+ def on_input_response(e: me.InputBlurEvent):
193
+ """Stores user input into state, so we can process their response."""
194
+ state = me.state(State)
195
+ state.response = e.value
196
+
197
+
198
+ def on_click_submit(e: me.ClickEvent):
199
+ """Submit user response to clue to check if they are correct using Gemini Live API."""
200
+ state = me.state(State)
201
+ if not state.response.strip():
202
+ return
203
+
204
+ state.text_input = state.response
205
+
206
+ # Hack to reset text input. Update the initial response value to current response
207
+ # first, which will trigger a diff when we set the initial response back to empty
208
+ # string.
209
+ #
210
+ # A small delay is also needed because some times the yield happens too fast, which
211
+ # does not allow the UI on the client to update properly.
212
+ state.response_value = state.response
213
+ yield
214
+ time.sleep(0.5)
215
+ state.response_value = ""
216
+ yield
217
+
218
+
219
+ def get_selected_question(board, selected_question_key) -> Clue:
220
+ """Gets the selected question from the key."""
221
+ _, row, col = selected_question_key.split("-")
222
+ return board.clues[int(row)][int(col)]
223
+
224
+
225
+ def format_dollars(value: int) -> str:
226
+ """Formats an integer value in US dollars format."""
227
+ if value < 0:
228
+ return f"-${value * -1:,}"
229
+ return f"${value:,}"
230
+
231
+
232
+ def get_gemini_live_tooltip() -> str:
233
+ """Tooltip messages for Gemini Live API web component button."""
234
+ state = me.state(State)
235
+ if state.gemini_live_api_enabled:
236
+ return "Stop game"
237
+ if state.api_key:
238
+ return "Start game"
239
+ return "Game disabled. Enter API Key."
240
+
241
+
242
+ def get_audio_player_tooltip() -> str:
243
+ """Tooltip messages for Audio player web component button."""
244
+ state = me.state(State)
245
+ if state.audio_player_enabled:
246
+ return "Audio playing"
247
+ if state.gemini_live_api_enabled:
248
+ return "Audio not playing"
249
+ return "Audio disabled"
250
+
251
+
252
+ def get_audio_recorder_tooltip() -> str:
253
+ """Tooltip messages for Audio recorder web component button."""
254
+ state = me.state(State)
255
+ if state.audio_recorder_state == "initializing":
256
+ "Microphone initializing"
257
+ if state.audio_recorder_state == "recording":
258
+ return "Microphone on"
259
+ if state.gemini_live_api_enabled:
260
+ return "Microphone muted"
261
+ return "Microphone disabled"
262
+
263
+
264
+ def on_input_api_key(e: me.InputEvent):
265
+ """Captures Google API key input"""
266
+ state = me.state(State)
267
+ state.api_key = e.value
268
+
269
+
270
+ def on_audio_play(e: mel.WebEvent):
271
+ """Event for when audio player play button was clicked."""
272
+ me.state(State).audio_player_enabled = True
273
+
274
+
275
+ def on_audio_stop(e: mel.WebEvent):
276
+ """Event for when audio player stop button was clicked."""
277
+ me.state(State).audio_player_enabled = False
278
+
279
+
280
+ def on_audio_recorder_state_change(e: mel.WebEvent):
281
+ """Event for when audio recorder state changes."""
282
+ me.state(State).audio_recorder_state = e.value
283
+
284
+
285
+ def on_gemini_live_api_started(e: mel.WebEvent):
286
+ """Event for when Gemin Live API start button was clicked."""
287
+ me.state(State).gemini_live_api_enabled = True
288
+
289
+
290
+ def on_gemini_live_api_stopped(e: mel.WebEvent):
291
+ """Event for when Gemin Live API stop button was clicked."""
292
+ state = me.state(State)
293
+ state.gemini_live_api_enabled = False
294
+ state.selected_question_key = ""
295
+ state.response_value = ""
296
+
297
+
298
+ def handle_tool_calls(e: mel.WebEvent):
299
+ """Proceses tool calls from Gemini Live API.
300
+
301
+ Supported tool calls:
302
+
303
+ - get_clue
304
+ - update_score
305
+ """
306
+ state = me.state(State)
307
+ tool_calls = json.loads(e.value["toolCalls"])
308
+ responses = []
309
+ for tool_call in tool_calls:
310
+ result = None
311
+ if tool_call["name"] == "get_clue":
312
+ result = tool_call_get_clue(
313
+ tool_call["args"]["category_index"], tool_call["args"]["dollar_index"]
314
+ )
315
+ elif tool_call["name"] == "update_score":
316
+ result = tool_call_update_score(tool_call["args"]["is_correct"])
317
+
318
+ responses.append(
319
+ {
320
+ "id": tool_call["id"],
321
+ "name": tool_call["name"],
322
+ "response": {
323
+ "result": result,
324
+ },
325
+ }
326
+ )
327
+
328
+ if responses:
329
+ print(responses)
330
+ state.tool_call_responses = json.dumps(responses)
331
+
332
+
333
+ def tool_call_update_score(is_correct: bool) -> str:
334
+ """Updates the user's score
335
+
336
+ Gemini will determine if the user is correct and then call this tool which will
337
+ allow the game state to be updated appropriately.
338
+ """
339
+ state = me.state(State)
340
+ selected_question = get_selected_question(state.board, state.selected_question_key)
341
+ if is_correct:
342
+ state.score += selected_question.normalized_value
343
+ else:
344
+ state.score -= selected_question.normalized_value
345
+
346
+ # Clear question so another can be picked.
347
+ state.answered_questions.add(state.selected_question_key)
348
+ state.selected_question_key = ""
349
+
350
+ return f"The user's score is {state.score}"
351
+
352
+
353
+ def tool_call_get_clue(category_index, dollar_index) -> str:
354
+ """Gets the selected clue.
355
+
356
+ Gemini will parse the user request and make a tool call with the row/col indexes.
357
+
358
+ Example: "Category X for $400".
359
+ """
360
+ cell_key = f"clue-{category_index}-{dollar_index}"
361
+ response = handle_select_clue(cell_key)
362
+
363
+ if isinstance(response, str):
364
+ return "There was an error. " + response
365
+
366
+ return f"The clue is {response.question}\n\n The answer to the clue is {response.answer}\n\n Please read the clue to the user."
367
+
368
+
369
+ def handle_select_clue(clue_key: str) -> Clue | str:
370
+ """Handles logic for clicking on a clue.
371
+
372
+ If it returns a string, it will be an error message.
373
+ If it returns a clue, that means a valid clue was selected.
374
+ """
375
+ state = me.state(State)
376
+ if state.selected_question_key:
377
+ return "A clue has already been selected."
378
+ if clue_key in state.answered_questions:
379
+ return "That clue has already been selected"
380
+ state.selected_question_key = clue_key
381
+ return get_selected_question(state.board, state.selected_question_key)
models.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class Clue(BaseModel):
5
+ air_date: str
6
+ category: str
7
+ question: str
8
+ value: str | None
9
+ answer: str
10
+ round: str
11
+ show_number: str
12
+ raw_value: int = 0
13
+ normalized_value: int = 0
14
+
15
+
16
+ class Board(BaseModel):
17
+ clues: list[list[Clue]]
question_bank.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from collections import defaultdict
4
+
5
+ from models import Clue
6
+
7
+
8
+ QuestionSet = list[Clue]
9
+
10
+ _JEOPARDY_DATA = "data/jeopardy.json"
11
+ _NUM_QUESTIONS_PER_CATEGORY = 5
12
+
13
+
14
+ def load() -> list[QuestionSet]:
15
+ """Loads a cleaned up data set to use in Mesop Jeopardy game."""
16
+ data = _load_raw_data()
17
+ data = _add_raw_value(data)
18
+ data = _clean_questions(data)
19
+ question_sets = _group_into_question_sets(data)
20
+ question_sets = _sort_question_sets(question_sets)
21
+ question_sets = _normalize_values(question_sets)
22
+ return _filter_out_incomplete_question_sets(question_sets)
23
+
24
+
25
+ def _load_raw_data() -> QuestionSet:
26
+ """Load the raw data set.
27
+
28
+ Format of each question/clue looks like this:
29
+
30
+ {
31
+ "category": "HISTORY",
32
+ "air_date": "2004-12-31",
33
+ "question": "'For the last 8 years of his life, Galileo was...",
34
+ "value": "$200",
35
+ "answer": "Copernicus",
36
+ "round": "Jeopardy!",
37
+ "show_number": "4680"
38
+ }
39
+ """
40
+ with open(_JEOPARDY_DATA, "r") as f:
41
+ return [Clue(**row) for row in json.load(f)]
42
+
43
+
44
+ def _add_raw_value(data: QuestionSet) -> QuestionSet:
45
+ """Add raw value since the value is formatted as a dollar string that isn't as easy
46
+ to sort"""
47
+ for row in data:
48
+ row.raw_value = _convert_dollar_amount(row.value)
49
+ return data
50
+
51
+
52
+ def _clean_questions(data: QuestionSet) -> QuestionSet:
53
+ """Clean up questions
54
+
55
+ - Strip single quotes around each question
56
+ - Replace escaped single quotes
57
+ - Strip HTML tags
58
+ """
59
+ for row in data:
60
+ row.question = re.sub("<[^<]+?>", "", row.question.strip("'").replace("\\'", "'"))
61
+ return data
62
+
63
+
64
+ def _convert_dollar_amount(value: str | None) -> int:
65
+ """Coverts raw value into an integer.
66
+
67
+ The raw value is string formatted as a dollar amount, such as $1,000. In this
68
+ dataset the dollar amount is not given for Daily Doubles that were not answered, so
69
+ we'll set those cases to a value of 0 for now.
70
+
71
+ In addition, answered daily doubles will have odd dollar amounts.
72
+
73
+ These values won't be used in the actually game. Only for roughly sorting the
74
+ question difficulty.
75
+ """
76
+ if value:
77
+ return int(value.replace("$", "").replace(",", ""))
78
+ else:
79
+ return 0
80
+
81
+
82
+ def _group_into_question_sets(data: QuestionSet) -> list[QuestionSet]:
83
+ """Groups the questions by category for that air date.
84
+
85
+ We want to mix and match questions across games, but we want to keep the questions
86
+ within a category together.
87
+ """
88
+ question_sets = defaultdict(lambda: [])
89
+ for row in data:
90
+ question_sets[(row.category, row.air_date)].append(row)
91
+ return list(question_sets.values())
92
+
93
+
94
+ def _sort_question_sets(question_sets: list[QuestionSet]) -> list[QuestionSet]:
95
+ return [_sort_question_set(question_set) for question_set in question_sets]
96
+
97
+
98
+ def _sort_question_set(question_set: QuestionSet) -> QuestionSet:
99
+ """Sort the question sets so they are ordered roughly in order difficulty.
100
+
101
+ This will not always be true due to Daily Doubles skewing the order. The data set
102
+ did not store the Daily Double values separately from the normal game value.
103
+ """
104
+ return sorted(question_set, key=lambda q: q.raw_value)
105
+
106
+
107
+ def _normalize_values(question_sets: list[QuestionSet]) -> list[QuestionSet]:
108
+ """Normalizes question dollar amounts based on order of appearance.
109
+
110
+ Since we picking random categories across different rounds and years, the dollar
111
+ values will differ. So we will normalize them here.
112
+ """
113
+ for question_set in question_sets:
114
+ for index, question in enumerate(question_set):
115
+ question.normalized_value = (index + 1) * 200
116
+ return question_sets
117
+
118
+
119
+ def _filter_out_incomplete_question_sets(question_sets: list[QuestionSet]) -> list[QuestionSet]:
120
+ """Filters out question sets that are incomplete (do not contain five questions).
121
+
122
+ Final Jeopardy categories only have one question so we want to ignore those.
123
+ We also want to avoid anomalies in the data set.
124
+
125
+ In addition there are cases where not all questions were answered for a category. This
126
+ means that we will be missing a question on the board.
127
+ """
128
+ return [
129
+ question_set
130
+ for question_set in question_sets
131
+ if len(question_set) == _NUM_QUESTIONS_PER_CATEGORY
132
+ ]
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Flask==3.1.0
2
+ google-genai==0.6.0
3
+ gunicorn==23.0.0
4
+ mesop==0.14.1
5
+ pydantic==2.10.5
6
+ websockets==14.2
ruff.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ line-length = 100
2
+ indent-width = 2
state.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Literal
2
+ from dataclasses import field
3
+ import random
4
+ import os
5
+
6
+ import question_bank
7
+ import mesop as me
8
+ from models import Board
9
+
10
+
11
+ _NUM_CATEGORIES = 6
12
+ _QUESTION_SETS = question_bank.load()
13
+
14
+
15
+ @me.stateclass
16
+ class State:
17
+ selected_clue: str
18
+ board: Board = field(default_factory=lambda: make_default_board(_QUESTION_SETS))
19
+ # Used for clearing the text input.
20
+ response_value: str
21
+ response: str
22
+ score: int
23
+ # Key format: click-{row_index}-{col_index}
24
+ selected_question_key: str
25
+ # Set is not JSON serializable
26
+ # Key format: click-{row_index}-{col_index}
27
+ answered_questions: set[str] = field(default_factory=set)
28
+ # Gemini Live API
29
+ api_key: str = os.getenv("GOOGLE_API_KEY", "")
30
+ gemini_live_api_enabled: bool = False
31
+ gemini_live_api_config: str
32
+ audio_player_enabled: bool = False
33
+ audio_recorder_state: Literal["disabled", "initializing", "recording"] = "disabled"
34
+ tool_call_responses: str = ""
35
+ text_input: str = ""
36
+
37
+
38
+ def make_default_board(jeopardy_questions) -> Board:
39
+ """Creates a board with some random jeopardy questions."""
40
+ random.shuffle(jeopardy_questions)
41
+ return Board(clues=jeopardy_questions[:_NUM_CATEGORIES])
trebek_bot.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Literal
2
+ import json
3
+
4
+
5
+ type VoiceName = Literal["Aoede", "Charon", "Fenrir", "Kore", "Puck"]
6
+ type GeminiModel = Literal["gemini-2.0-flash-exp"]
7
+
8
+
9
+ _TOOL_DEFINITIONS = {
10
+ "functionDeclarations": [
11
+ {
12
+ "name": "get_clue",
13
+ "description": "Gets the clue from the board which returns the clue and answer",
14
+ "parameters": {
15
+ "type": "object",
16
+ "properties": {
17
+ "category_index": {"type": "integer", "description": "Index of selected category."},
18
+ "dollar_index": {"type": "integer", "description": "Index of selected dollar amount."},
19
+ },
20
+ "required": ["category_index", "dollar_index"],
21
+ },
22
+ },
23
+ {
24
+ "name": "update_score",
25
+ "description": "Updates whether user got the question correct or not.",
26
+ "parameters": {
27
+ "type": "object",
28
+ "properties": {
29
+ "is_correct": {"type": "boolean", "description": "True if correct. False is incorrect."},
30
+ },
31
+ "required": ["is_correct"],
32
+ },
33
+ },
34
+ ]
35
+ }
36
+
37
+ _SYSTEM_INSTRUCTIONS = """
38
+ You are the host of Jeopardy. Make sure users follow the rules of the game.
39
+
40
+ You have access to the following tools:
41
+ - get_clue: Gets the clue selected by the user. Always use this for picking clues. Do not make up your own clues.
42
+ - update_score: Updates the users score depending on if they answered the clue correctly.
43
+
44
+ The categories are [[categories]]. Each category has 5 questions, with the following dollar
45
+ amounts: $200, $400, $600, $800, $1000.
46
+
47
+ When the user asks for a clue, they will specify the category and dollar amount. Use the
48
+ `get_clue` tool by passing in the corresponding indexes for the category and dollar
49
+ amount.
50
+
51
+ For example if the categories are Witches, Gold Rush, American History, Desserts, Wet & Wild,
52
+ and the user says "American History for $800", the index will be 2 for the category and 3
53
+ for the dollar amount.
54
+
55
+ The `get_clue` tool will return the clue and answer if it is valid. If it is invalid it
56
+ will return an error message.
57
+
58
+ Wait for the `get_clue` tool response before responding.
59
+
60
+ When you get the response to the `get_clue` tool, read the clue to the user.
61
+
62
+ Briefly explain to the user why their answer is correct or wrong.
63
+
64
+ Use the `update_score` tool to update their score. Pass in true if they were correct.
65
+ Pass in false if they were not correct. This tool will return the user's current score.
66
+ """.strip()
67
+
68
+
69
+ def make_system_instruction(categories: list[str]):
70
+ return _SYSTEM_INSTRUCTIONS.replace("[[categories]]", ", ".join(categories))
71
+
72
+
73
+ def make_gemini_live_api_config(
74
+ model: GeminiModel = "gemini-2.0-flash-exp",
75
+ system_instructions: str = "",
76
+ voice_name: VoiceName = "Puck",
77
+ ):
78
+ return json.dumps(
79
+ {
80
+ "setup": {
81
+ "model": f"models/{model}",
82
+ "system_instruction": {"role": "user", "parts": [{"text": system_instructions}]},
83
+ "tools": _TOOL_DEFINITIONS,
84
+ "generation_config": {
85
+ "temperature": 0.3,
86
+ "response_modalities": ["audio"],
87
+ "speech_config": {"voice_config": {"prebuilt_voice_config": {"voice_name": voice_name}}},
88
+ },
89
+ }
90
+ }
91
+ )
web_components/audio_player.js ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {
2
+ LitElement,
3
+ html,
4
+ } from "https://cdn.jsdelivr.net/gh/lit/dist@3/core/lit-core.min.js";
5
+
6
+ class AudioPlayer extends LitElement {
7
+ static properties = {
8
+ playEvent: { type: String },
9
+ stopEvent: { type: String },
10
+ enabled: { type: Boolean },
11
+ data: { type: String },
12
+ };
13
+
14
+ constructor() {
15
+ super();
16
+ this.enabled = false;
17
+ this.audioContext = null; // Initialize audio context
18
+ this.sampleRate = 24000; // Gemini Live API sends data in 24000hz
19
+ this.channels = 1;
20
+ this.queue = [];
21
+ this.isPlaying = false;
22
+
23
+ this.onGeminiLiveStarted = (e) => {
24
+ if (!this.enabled) {
25
+ this.playAudio();
26
+ }
27
+ };
28
+
29
+ this.onGeminiLiveStopped = (e) => {
30
+ this.dispatchEvent(new MesopEvent(this.stopEvent, {}));
31
+ };
32
+
33
+ this.onAudioOutputReceived = (e) => {
34
+ this.addToQueue(e.detail.data);
35
+ };
36
+ }
37
+
38
+ connectedCallback() {
39
+ super.connectedCallback();
40
+ window.addEventListener(
41
+ "audio-output-received",
42
+ this.onAudioOutputReceived
43
+ );
44
+ window.addEventListener(
45
+ "gemini-live-api-started",
46
+ this.onGeminiLiveStarted
47
+ );
48
+ window.addEventListener(
49
+ "gemini-live-api-stopped",
50
+ this.onGeminiLiveStopped
51
+ );
52
+ }
53
+
54
+ disconnectedCallback() {
55
+ super.disconnectedCallback();
56
+ if (this.audioContext) {
57
+ this.audioContext.close();
58
+ }
59
+ window.removeEventListener(
60
+ "audio-output-received",
61
+ this.onAudioInputReceived
62
+ );
63
+ window.removeEventListener(
64
+ "gemini-live-api-started",
65
+ this.onGeminiLiveStarted
66
+ );
67
+ window.removeEventListener(
68
+ "gemini-live-api-stopped",
69
+ this.onGeminiLiveStopped
70
+ );
71
+ }
72
+
73
+ firstUpdated() {
74
+ if (this.enabled) {
75
+ this.playAudio();
76
+ }
77
+ }
78
+
79
+ updated(changedProperties) {
80
+ // Add audio chunks to queue to play.
81
+ if (changedProperties.has("data") && this.data.length > 0) {
82
+ this.addToQueue(this.data);
83
+ }
84
+
85
+ // Clear the queue if the audio player is disabled.
86
+ if (changedProperties.has("enabled") && !this.enabled) {
87
+ this.queue = [];
88
+ }
89
+ }
90
+
91
+ addToQueue(base64Data) {
92
+ if (!this.enabled) {
93
+ return;
94
+ }
95
+ this.queue.push(base64Data);
96
+ if (!this.isPlaying) {
97
+ this.playNext();
98
+ }
99
+ }
100
+
101
+ playAudio() {
102
+ if (!this.enabled) {
103
+ this.dispatchEvent(new MesopEvent(this.playEvent, {}));
104
+ }
105
+ if (!this.audioContext) {
106
+ this.audioContext = new AudioContext();
107
+ }
108
+ this.playNext();
109
+ }
110
+
111
+ playNext() {
112
+ if (!this.enabled || !this.audioContext || this.queue.length === 0) {
113
+ this.isPlaying = false;
114
+ return;
115
+ }
116
+
117
+ this.isPlaying = true;
118
+ const data = this.queue.shift();
119
+ const source = this.playPCM(data);
120
+
121
+ source.onended = () => {
122
+ this.playNext();
123
+ };
124
+ }
125
+
126
+ playPCM(data) {
127
+ // Convert base64 to binary.
128
+ const binaryAudio = atob(data);
129
+
130
+ // Convert binary string to ArrayBuffer.
131
+ const audioBuffer = new ArrayBuffer(binaryAudio.length);
132
+ const bufferView = new Uint8Array(audioBuffer);
133
+ for (let i = 0; i < binaryAudio.length; i++) {
134
+ bufferView[i] = binaryAudio.charCodeAt(i);
135
+ }
136
+
137
+ // Convert to 16-bit PCM data.
138
+ const pcmData = new Int16Array(audioBuffer);
139
+
140
+ // Create audio buffer.
141
+ const frameCount = pcmData.length;
142
+ const audioBufferData = this.audioContext.createBuffer(
143
+ this.channels,
144
+ frameCount,
145
+ this.sampleRate
146
+ );
147
+
148
+ // Get channel data and convert PCM to float32.
149
+ const channelData = audioBufferData.getChannelData(0);
150
+ for (let i = 0; i < frameCount; i++) {
151
+ // Convert 16-bit PCM (-32768 to 32767) to float32 (-1.0 to 1.0)
152
+ channelData[i] = pcmData[i] / 32768.0;
153
+ }
154
+
155
+ // Create and play the source.
156
+ const source = this.audioContext.createBufferSource();
157
+ source.buffer = audioBufferData;
158
+ source.connect(this.audioContext.destination);
159
+ source.start();
160
+
161
+ return source;
162
+ }
163
+
164
+ render() {
165
+ if (this.enabled) {
166
+ return html`<span><slot></slot></span>`;
167
+ }
168
+ return html`<span @click="${this.playAudio}"><slot></slot></span>`;
169
+ }
170
+ }
171
+
172
+ customElements.define("audio-player", AudioPlayer);
web_components/audio_player.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Callable
2
+ import base64
3
+
4
+ import mesop.labs as mel
5
+
6
+
7
+ @mel.web_component(path="./audio_player.js")
8
+ def audio_player(
9
+ *,
10
+ enabled: bool = False,
11
+ data: bytes = b"",
12
+ on_play: Callable[[mel.WebEvent], Any] | None = None,
13
+ on_stop: Callable[[mel.WebEvent], Any] | None = None,
14
+ ):
15
+ """Plays audio streamed from the server.
16
+
17
+ An important thing to note is that the audio player does not persist the data it
18
+ receives. Instead the data is stored in a queue and removed once the audio has been
19
+ played.
20
+
21
+ This is a barebones configuration that sets the sample rate to 24000hz since that is
22
+ what Gemini returns. In addition we expect the data to be in PCM format.
23
+ """
24
+ return mel.insert_web_component(
25
+ name="audio-player",
26
+ events=_filter_events(
27
+ {
28
+ "playEvent": on_play,
29
+ "stopEvent": on_stop,
30
+ }
31
+ ),
32
+ properties={
33
+ "enabled": enabled,
34
+ "data": base64.b64encode(data).decode("utf-8"),
35
+ },
36
+ )
37
+
38
+
39
+ def _filter_events(events: dict[str, Callable[[mel.WebEvent], Any] | None]):
40
+ return {event: callback for event, callback in events.items() if callback}
web_components/audio_recorder.js ADDED
@@ -0,0 +1,496 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {
2
+ LitElement,
3
+ html,
4
+ } from "https://cdn.jsdelivr.net/gh/lit/dist@3/core/lit-core.min.js";
5
+
6
+ class AudioRecorder extends LitElement {
7
+ static properties = {
8
+ dataEvent: { type: String },
9
+ stateChangeEvent: { type: String },
10
+ state: { type: String },
11
+ isRecording: { type: Boolean },
12
+ debugBuffer: { state: true },
13
+ debug: { type: Boolean },
14
+ voiceDetectionEnabled: { type: Boolean },
15
+ voiceThreshold: { type: Number },
16
+ voiceHoldTime: { type: Number },
17
+ };
18
+
19
+ constructor() {
20
+ super();
21
+ this.debug = false;
22
+ this.mediaStream = null;
23
+ this.audioContext = null;
24
+ this.processor = null;
25
+ this.isStreaming = false;
26
+ this.isRecording = false;
27
+ this.isInitializing = false;
28
+ this.sequenceNumber = 0;
29
+ this.debugBuffer = [];
30
+ this.debugBufferSize = 50;
31
+ this.targetSampleRate = 16000;
32
+
33
+ // Voice detection parameters
34
+ this.voiceDetectionEnabled = true; // Enable by default
35
+ this.voiceThreshold = 0.01; // RMS threshold for voice detection
36
+ this.voiceHoldTime = 500; // Time to hold voice detection state in ms
37
+ this.lastVoiceDetectedTime = 0; // Last time voice was detected
38
+ this.isVoiceDetected = false; // Current voice detection state
39
+ this.consecutiveSilentFrames = 0; // Counter for silent frames
40
+ this.silenceThreshold = 10; // Number of silent frames before cutting off
41
+
42
+ this.onGeminiLiveStarted = (e) => {
43
+ if (this.isRecording) {
44
+ this.startStreaming();
45
+ }
46
+ };
47
+ this.onGeminiLiveStopped = (e) => {
48
+ this.stop();
49
+ };
50
+ }
51
+
52
+ connectedCallback() {
53
+ super.connectedCallback();
54
+ window.addEventListener(
55
+ "gemini-live-api-started",
56
+ this.onGeminiLiveStarted
57
+ );
58
+ window.addEventListener(
59
+ "gemini-live-api-stopped",
60
+ this.onGeminiLiveStopped
61
+ );
62
+ }
63
+
64
+ disconnectedCallback() {
65
+ super.disconnectedCallback();
66
+ this.stop();
67
+ window.removeEventListener(
68
+ "gemini-live-api-started",
69
+ this.onAudioInputReceived
70
+ );
71
+ window.removeEventListener(
72
+ "gemini-live-api-stopped",
73
+ this.onGeminiLiveStopped
74
+ );
75
+ }
76
+
77
+ firstUpdated() {
78
+ if (this.state !== "disabled") {
79
+ this.startStreaming();
80
+ }
81
+ }
82
+
83
+ log(...args) {
84
+ if (this.debug) {
85
+ console.log(...args);
86
+ }
87
+ }
88
+
89
+ warn(...args) {
90
+ if (this.debug) {
91
+ console.warn(...args);
92
+ }
93
+ }
94
+
95
+ error(...args) {
96
+ if (this.debug) {
97
+ console.error(...args);
98
+ }
99
+ }
100
+
101
+ isVoiceFrame(audioData) {
102
+ // Calculate RMS of the audio frame
103
+ let sumSquares = 0;
104
+ for (let i = 0; i < audioData.length; i++) {
105
+ sumSquares += audioData[i] * audioData[i];
106
+ }
107
+ const rms = Math.sqrt(sumSquares / audioData.length);
108
+
109
+ const now = Date.now();
110
+
111
+ // Check if we detect voice in this frame
112
+ if (rms > this.voiceThreshold) {
113
+ this.lastVoiceDetectedTime = now;
114
+ this.consecutiveSilentFrames = 0;
115
+ this.isVoiceDetected = true;
116
+ return true;
117
+ }
118
+
119
+ // Check if we're still within the hold time
120
+ if (now - this.lastVoiceDetectedTime < this.voiceHoldTime) {
121
+ return true;
122
+ }
123
+
124
+ // Increment silent frames counter
125
+ this.consecutiveSilentFrames++;
126
+
127
+ // If we've seen enough silent frames, mark as silent
128
+ if (this.consecutiveSilentFrames > this.silenceThreshold) {
129
+ this.isVoiceDetected = false;
130
+ }
131
+
132
+ return this.isVoiceDetected;
133
+ }
134
+
135
+ async startStreaming() {
136
+ if (this.state === "disabled") {
137
+ this.dispatchEvent(new MesopEvent(this.stateChangeEvent, "initializing"));
138
+ }
139
+ this.isInitializing = true;
140
+ const initialized = await this.initialize();
141
+ this.isInitializing = false;
142
+ if (initialized) {
143
+ this.isRecording = true;
144
+ this.dispatchEvent(new MesopEvent(this.stateChangeEvent, "recording"));
145
+ this.start();
146
+ }
147
+ }
148
+
149
+ async initialize() {
150
+ try {
151
+ // First check what sample rates are supported with echo cancellation
152
+ const testStream = await navigator.mediaDevices.getUserMedia({
153
+ audio: {
154
+ echoCancellation: true,
155
+ noiseSuppression: true,
156
+ autoGainControl: true,
157
+ },
158
+ video: false,
159
+ });
160
+
161
+ // Get the actual sample rate from the system
162
+ const systemTrack = testStream.getAudioTracks()[0];
163
+ const settings = systemTrack.getSettings();
164
+ this.log("System audio settings:", settings);
165
+
166
+ // Clean up the test stream
167
+ testStream.getTracks().forEach((track) => track.stop());
168
+
169
+ // Now create the real stream using the system's capabilities
170
+ this.mediaStream = await navigator.mediaDevices.getUserMedia({
171
+ audio: {
172
+ channelCount: 1,
173
+ sampleRate: settings.sampleRate,
174
+ echoCancellation: true,
175
+ noiseSuppression: true,
176
+ autoGainControl: true,
177
+ echoCancellationType: "system",
178
+ latency: 0,
179
+ },
180
+ video: false,
181
+ });
182
+
183
+ // Log the actual constraints that were applied
184
+ const audioTrack = this.mediaStream.getAudioTracks()[0];
185
+ const actualConstraints = audioTrack.getSettings();
186
+ this.log("Applied audio constraints:", actualConstraints);
187
+
188
+ // Set up audio context matching the system rate
189
+ this.audioContext = new AudioContext({
190
+ sampleRate: settings.sampleRate,
191
+ });
192
+ this.log(
193
+ "AudioContext created with sample rate:",
194
+ this.audioContext.sampleRate
195
+ );
196
+
197
+ const micSource = this.audioContext.createMediaStreamSource(
198
+ this.mediaStream
199
+ );
200
+
201
+ this.processor = this.audioContext.createScriptProcessor(4096, 1, 1);
202
+
203
+ // Connect the audio nodes
204
+ micSource.connect(this.processor);
205
+ this.processor.connect(this.audioContext.destination);
206
+
207
+ return true;
208
+ } catch (error) {
209
+ this.error("Error initializing audio streamer:", error);
210
+ return false;
211
+ }
212
+ }
213
+
214
+ downsampleBuffer(buffer, originalSampleRate) {
215
+ if (originalSampleRate === this.targetSampleRate) {
216
+ return buffer;
217
+ }
218
+
219
+ const ratio = originalSampleRate / this.targetSampleRate;
220
+ const newLength = Math.floor(buffer.length / ratio);
221
+ const result = new Float32Array(newLength);
222
+
223
+ for (let i = 0; i < newLength; i++) {
224
+ const startIndex = Math.floor(i * ratio);
225
+ const endIndex = Math.floor((i + 1) * ratio);
226
+ let sum = 0;
227
+ let count = 0;
228
+
229
+ for (let j = startIndex; j < endIndex && j < buffer.length; j++) {
230
+ sum += buffer[j];
231
+ count++;
232
+ }
233
+
234
+ result[i] = count > 0 ? sum / count : 0;
235
+ }
236
+
237
+ this.log("Downsampling details:", {
238
+ originalRate: originalSampleRate,
239
+ targetRate: this.targetSampleRate,
240
+ originalLength: buffer.length,
241
+ newLength: result.length,
242
+ actualRatio: buffer.length / result.length,
243
+ });
244
+
245
+ return result;
246
+ }
247
+
248
+ addAudioDebugger(sourceNode, label) {
249
+ if (!this.debug) return;
250
+
251
+ const analyser = this.audioContext.createAnalyser();
252
+ analyser.fftSize = 2048;
253
+ sourceNode.connect(analyser);
254
+
255
+ const bufferLength = analyser.frequencyBinCount;
256
+ const dataArray = new Float32Array(bufferLength);
257
+
258
+ this.debugInterval = setInterval(() => {
259
+ if (!this.isStreaming) return;
260
+
261
+ analyser.getFloatTimeDomainData(dataArray);
262
+ let rms = 0;
263
+ for (let i = 0; i < bufferLength; i++) {
264
+ rms += dataArray[i] * dataArray[i];
265
+ }
266
+ rms = Math.sqrt(rms / bufferLength);
267
+ this.log(`${label} RMS Level: ${rms.toFixed(6)}`);
268
+ }, 1000);
269
+ }
270
+
271
+ start() {
272
+ this.isStreaming = true;
273
+ this.debugBuffer = [];
274
+ this.lastVoiceDetectedTime = 0;
275
+ this.isVoiceDetected = false;
276
+ this.consecutiveSilentFrames = 0;
277
+
278
+ this.processor.onaudioprocess = (event) => {
279
+ if (!this.isStreaming) return;
280
+
281
+ const inputData = event.inputBuffer.getChannelData(0);
282
+ const originalSampleRate = event.inputBuffer.sampleRate;
283
+
284
+ // Log initial processing details if needed
285
+ if (this.sequenceNumber === 0) {
286
+ this.log("Audio Processing Details:", {
287
+ bufferSize: this.processor.bufferSize,
288
+ inputChannels: this.processor.numberOfInputs,
289
+ outputChannels: this.processor.numberOfOutputs,
290
+ originalSampleRate: originalSampleRate,
291
+ targetSampleRate: this.targetSampleRate,
292
+ length: inputData.length,
293
+ timestamp: event.timeStamp,
294
+ });
295
+ }
296
+
297
+ // Check for voice activity if enabled
298
+ if (this.voiceDetectionEnabled && !this.isVoiceFrame(inputData)) {
299
+ // Skip this frame if no voice is detected
300
+ this.sequenceNumber++; // Still increment to maintain sequence
301
+ return;
302
+ }
303
+
304
+ const downsampledData = this.downsampleBuffer(
305
+ inputData,
306
+ originalSampleRate
307
+ );
308
+
309
+ const processedData = new Float32Array(downsampledData.length);
310
+ const gain = 5.0;
311
+ for (let i = 0; i < downsampledData.length; i++) {
312
+ processedData[i] = downsampledData[i] * gain;
313
+ }
314
+
315
+ // Debug logging
316
+ if (this.sequenceNumber % 50 === 0 && this.debug) {
317
+ const stats = {
318
+ originalLength: inputData.length,
319
+ downsampledLength: downsampledData.length,
320
+ maxValue: Math.max(...processedData),
321
+ minValue: Math.min(...processedData),
322
+ originalSampleRate,
323
+ targetSampleRate: this.targetSampleRate,
324
+ isVoiceDetected: this.isVoiceDetected,
325
+ };
326
+ this.log("Audio buffer stats:", stats);
327
+ }
328
+
329
+ // Store in debug buffer
330
+ this.debugBuffer.push(processedData);
331
+ if (this.debugBuffer.length > this.debugBufferSize) {
332
+ this.debugBuffer.shift();
333
+ }
334
+
335
+ // Audio level monitoring
336
+ let rms = 0;
337
+ for (let i = 0; i < processedData.length; i++) {
338
+ rms += processedData[i] * processedData[i];
339
+ }
340
+ rms = Math.sqrt(rms / processedData.length);
341
+
342
+ if (this.sequenceNumber % 10 === 0 && this.debug) {
343
+ this.log(
344
+ `Audio Level (RMS): ${rms.toFixed(4)}, Voice Detected: ${
345
+ this.isVoiceDetected
346
+ }`
347
+ );
348
+ if (rms < 0.0001) {
349
+ this.warn(
350
+ "Warning: Very low audio level detected. Check if microphone is working."
351
+ );
352
+ }
353
+ }
354
+
355
+ // Convert to Int16Array for transmission
356
+ const intData = new Int16Array(processedData.length);
357
+ for (let i = 0; i < processedData.length; i++) {
358
+ intData[i] = Math.max(
359
+ -32768,
360
+ Math.min(32767, processedData[i] * 32768)
361
+ );
362
+
363
+ if (this.sequenceNumber % 100 === 0 && i < 10 && this.debug) {
364
+ this.log(
365
+ `Sample ${i}: Float=${processedData[i].toFixed(4)}, Int16=${
366
+ intData[i]
367
+ }`
368
+ );
369
+ }
370
+ }
371
+
372
+ // Convert to base64 and dispatch
373
+ const bytes = new Uint8Array(intData.buffer);
374
+ const base64Data = btoa(
375
+ Array.from(bytes)
376
+ .map((byte) => String.fromCharCode(byte))
377
+ .join("")
378
+ );
379
+
380
+ this.dispatchEvent(
381
+ new MesopEvent(this.dataEvent, {
382
+ sequence: this.sequenceNumber++,
383
+ sampleRate: this.targetSampleRate,
384
+ data: base64Data,
385
+ isVoice: this.isVoiceDetected,
386
+ })
387
+ );
388
+
389
+ this.dispatchEvent(
390
+ new CustomEvent("audio-input-received", {
391
+ detail: { data: base64Data },
392
+ // Allow event to cross shadow DOM boundaries (both need to be true)
393
+ bubbles: true,
394
+ composed: true,
395
+ })
396
+ );
397
+ };
398
+
399
+ return true;
400
+ }
401
+
402
+ stop() {
403
+ this.isStreaming = false;
404
+ this.isRecording = false;
405
+
406
+ this.dispatchEvent(new MesopEvent(this.stateChangeEvent, "disabled"));
407
+
408
+ if (this.debugInterval) {
409
+ clearInterval(this.debugInterval);
410
+ }
411
+
412
+ if (this.processor) {
413
+ this.processor.onaudioprocess = null;
414
+ }
415
+
416
+ if (this.mediaStream) {
417
+ this.mediaStream.getTracks().forEach((track) => track.stop());
418
+ }
419
+
420
+ if (this.audioContext) {
421
+ this.audioContext.close();
422
+ }
423
+ }
424
+
425
+ async playbackDebug() {
426
+ if (!this.debugBuffer.length) {
427
+ this.log("No audio data available for playback");
428
+ return;
429
+ }
430
+
431
+ const playbackContext = new AudioContext();
432
+ const systemSampleRate = playbackContext.sampleRate;
433
+
434
+ const totalSamples16k =
435
+ this.debugBuffer.length * this.debugBuffer[0].length;
436
+
437
+ const upsampledLength = Math.round(
438
+ totalSamples16k * (systemSampleRate / this.targetSampleRate)
439
+ );
440
+
441
+ const audioBuffer = playbackContext.createBuffer(
442
+ 1,
443
+ upsampledLength,
444
+ systemSampleRate
445
+ );
446
+
447
+ const channelData = audioBuffer.getChannelData(0);
448
+
449
+ const combined16kBuffer = new Float32Array(totalSamples16k);
450
+ let offset = 0;
451
+ for (let i = 0; i < this.debugBuffer.length; i++) {
452
+ combined16kBuffer.set(this.debugBuffer[i], offset);
453
+ offset += this.debugBuffer[i].length;
454
+ }
455
+
456
+ const ratio = this.targetSampleRate / systemSampleRate;
457
+ for (let i = 0; i < upsampledLength; i++) {
458
+ const position = i * ratio;
459
+ const index = Math.floor(position);
460
+ const decimal = position - index;
461
+
462
+ const sample1 = combined16kBuffer[index] || 0;
463
+ const sample2 = combined16kBuffer[index + 1] || sample1;
464
+ channelData[i] = sample1 + decimal * (sample2 - sample1);
465
+ }
466
+
467
+ const source = playbackContext.createBufferSource();
468
+ source.buffer = audioBuffer;
469
+ source.connect(playbackContext.destination);
470
+ source.start();
471
+ this.log("Playing debug audio at system rate...", {
472
+ systemSampleRate,
473
+ originalLength: totalSamples16k,
474
+ upsampledLength,
475
+ });
476
+
477
+ source.onended = () => {
478
+ this.log("Debug playback finished");
479
+ playbackContext.close();
480
+ };
481
+ }
482
+
483
+ render() {
484
+ if (this.isInitializing) {
485
+ return html`<span><slot></slot></span>`;
486
+ }
487
+
488
+ if (this.isRecording) {
489
+ return html`<span @click="${this.stop}"><slot></slot></span> `;
490
+ }
491
+
492
+ return html`<span @click="${this.startStreaming}"><slot></slot></span>`;
493
+ }
494
+ }
495
+
496
+ customElements.define("audio-recorder", AudioRecorder);
web_components/audio_recorder.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Callable, Literal
2
+
3
+ import mesop.labs as mel
4
+
5
+
6
+ @mel.web_component(path="./audio_recorder.js")
7
+ def audio_recorder(
8
+ *,
9
+ state: Literal["disabled", "initializing", "recording"] = "disabled",
10
+ on_data: Callable[[mel.WebEvent], Any] | None = None,
11
+ on_state_change: Callable[[mel.WebEvent], Any] | None = None,
12
+ ):
13
+ """Records audio and streams audio to the Mesop server.
14
+
15
+ This web components is designed to work with `MESOP_WEBSOCKETS_ENABLED=true`.
16
+
17
+ The `on_data` event returns continuous chunk of audio in base64-encoded PCM format
18
+ with 16000hz sampling rate. For some reason the Gemini Live API only accepts the PCM
19
+ data 16000hz. At 48000hz, nothing is returned. Perhaps there is a setting to override
20
+ the expected sampling rate when sent to the Gemini Live API. Unfortunately, the docs
21
+ are very sparse right now.
22
+
23
+ The data event looks like:
24
+
25
+ {
26
+ "data": <base64-encoded-string>
27
+ }
28
+ """
29
+ return mel.insert_web_component(
30
+ name="audio-recorder",
31
+ events=_filter_events(
32
+ {
33
+ "dataEvent": on_data,
34
+ "stateChangeEvent": on_state_change,
35
+ }
36
+ ),
37
+ properties={
38
+ "state": state,
39
+ },
40
+ )
41
+
42
+
43
+ def _filter_events(events: dict[str, Callable[[mel.WebEvent], Any] | None]):
44
+ return {event: callback for event, callback in events.items() if callback}
web_components/gemini_live_connection.js ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {
2
+ LitElement,
3
+ html,
4
+ } from "https://cdn.jsdelivr.net/gh/lit/dist@3/core/lit-core.min.js";
5
+
6
+ class GeminiLiveConnection extends LitElement {
7
+ static properties = {
8
+ api_config: { type: String },
9
+ enabled: { type: Boolean },
10
+ endpoint: { type: String },
11
+ startEvent: { type: String },
12
+ stopEvent: { type: String },
13
+ text_input: { type: String },
14
+ toolCallEvent: { type: String },
15
+ tool_call_responses: { type: String },
16
+ };
17
+
18
+ constructor() {
19
+ super();
20
+ this.onSetupComplete = () => {
21
+ console.log("Setup complete...");
22
+ };
23
+ this.onAudioData = (base64Data) => {
24
+ this.dispatchEvent(
25
+ new CustomEvent("audio-output-received", {
26
+ detail: { data: base64Data },
27
+ // Allow event to cross shadow DOM boundaries (both need to be true)
28
+ bubbles: true,
29
+ composed: true,
30
+ })
31
+ );
32
+ };
33
+ this.onInterrupted = () => {};
34
+ this.onTurnComplete = () => {};
35
+ this.onError = () => {};
36
+ this.onClose = () => {
37
+ console.log("Web socket closed...");
38
+ };
39
+ this.onToolCall = (toolCalls) => {
40
+ this.dispatchEvent(
41
+ new MesopEvent(this.toolCallEvent, {
42
+ toolCalls: JSON.stringify(toolCalls.functionCalls),
43
+ })
44
+ );
45
+ };
46
+ this.pendingSetupMessage = null;
47
+
48
+ this.onAudioInputReceived = (e) => {
49
+ this.sendAudioChunk(e.detail.data);
50
+ };
51
+ }
52
+
53
+ connectedCallback() {
54
+ super.connectedCallback();
55
+ // Start listening for events when component is connected
56
+ window.addEventListener("audio-input-received", this.onAudioInputReceived);
57
+ }
58
+
59
+ disconnectedCallback() {
60
+ super.disconnectedCallback();
61
+ window.removeEventListener(
62
+ "audio-input-received",
63
+ this.onAudioInputReceived
64
+ );
65
+ if (this.ws) {
66
+ this.ws.close();
67
+ }
68
+ }
69
+
70
+ firstUpdated() {
71
+ if (this.enabled) {
72
+ this.setupWebSocket();
73
+ }
74
+ }
75
+
76
+ updated(changedProperties) {
77
+ if (
78
+ changedProperties.has("tool_call_responses") &&
79
+ this.tool_call_responses.length > 0
80
+ ) {
81
+ this.sendToolResponse(JSON.parse(this.tool_call_responses));
82
+ }
83
+ if (changedProperties.has("text_input") && this.text_input.length > 0) {
84
+ this.sendTextMessage(this.text_input);
85
+ }
86
+ }
87
+
88
+ start() {
89
+ if (!this.enabled) {
90
+ this.dispatchEvent(new MesopEvent(this.startEvent, {}));
91
+ this.dispatchEvent(
92
+ new CustomEvent("gemini-live-api-started", {
93
+ detail: {},
94
+ // Allow event to cross shadow DOM boundaries (both need to be true)
95
+ bubbles: true,
96
+ composed: true,
97
+ })
98
+ );
99
+ }
100
+ this.setupWebSocket();
101
+ }
102
+
103
+ stop() {
104
+ this.dispatchEvent(new MesopEvent(this.stopEvent, {}));
105
+ this.dispatchEvent(
106
+ new CustomEvent("gemini-live-api-stopped", {
107
+ detail: {},
108
+ // Allow event to cross shadow DOM boundaries (both need to be true)
109
+ bubbles: true,
110
+ composed: true,
111
+ })
112
+ );
113
+ if (this.ws) {
114
+ this.ws.close();
115
+ }
116
+ }
117
+
118
+ setupWebSocket() {
119
+ this.ws = new WebSocket(this.endpoint);
120
+ this.ws.onopen = () => {
121
+ console.log("WebSocket connection is opening...");
122
+ this.sendSetupMessage();
123
+ };
124
+
125
+ this.ws.onmessage = async (event) => {
126
+ try {
127
+ let wsResponse;
128
+ if (event.data instanceof Blob) {
129
+ const responseText = await event.data.text();
130
+ wsResponse = JSON.parse(responseText);
131
+ } else {
132
+ wsResponse = JSON.parse(event.data);
133
+ }
134
+
135
+ if (wsResponse.setupComplete) {
136
+ this.onSetupComplete();
137
+ } else if (wsResponse.toolCall) {
138
+ this.onToolCall(wsResponse.toolCall);
139
+ } else if (wsResponse.serverContent) {
140
+ if (wsResponse.serverContent.interrupted) {
141
+ this.onInterrupted();
142
+ return;
143
+ }
144
+
145
+ if (wsResponse.serverContent.modelTurn?.parts?.[0]?.inlineData) {
146
+ const audioData =
147
+ wsResponse.serverContent.modelTurn.parts[0].inlineData.data;
148
+ this.onAudioData(audioData);
149
+
150
+ if (!wsResponse.serverContent.turnComplete) {
151
+ this.sendContinueSignal();
152
+ }
153
+ }
154
+
155
+ if (wsResponse.serverContent.turnComplete) {
156
+ this.onTurnComplete();
157
+ }
158
+ }
159
+ } catch (error) {
160
+ console.error("Error parsing response:", error);
161
+ this.onError("Error parsing response: " + error.message);
162
+ }
163
+ };
164
+
165
+ this.ws.onerror = (error) => {
166
+ console.error("WebSocket Error:", error);
167
+ this.onError("WebSocket Error: " + error.message);
168
+ };
169
+
170
+ this.ws.onclose = (event) => {
171
+ console.log("Connection closed:", event);
172
+ this.onClose(event);
173
+ };
174
+ }
175
+
176
+ sendMessage(message) {
177
+ if (this.ws.readyState === WebSocket.OPEN) {
178
+ this.ws.send(JSON.stringify(message));
179
+ } else {
180
+ console.error(
181
+ "WebSocket is not open. Current state:",
182
+ this.ws.readyState
183
+ );
184
+ this.onError("WebSocket is not ready. Please try again.");
185
+ }
186
+ }
187
+
188
+ sendSetupMessage() {
189
+ if (this.ws.readyState === WebSocket.OPEN) {
190
+ this.ws.send(this.api_config);
191
+ } else {
192
+ console.error("Connection not ready.");
193
+ }
194
+ }
195
+
196
+ sendAudioChunk(base64Audio) {
197
+ const message = {
198
+ realtime_input: {
199
+ media_chunks: [
200
+ {
201
+ mime_type: "audio/pcm",
202
+ data: base64Audio,
203
+ },
204
+ ],
205
+ },
206
+ };
207
+ this.sendMessage(message);
208
+ }
209
+
210
+ sendEndMessage() {
211
+ const message = {
212
+ client_content: {
213
+ turns: [
214
+ {
215
+ role: "user",
216
+ parts: [],
217
+ },
218
+ ],
219
+ turn_complete: true,
220
+ },
221
+ };
222
+ this.sendMessage(message);
223
+ }
224
+
225
+ sendContinueSignal() {
226
+ const message = {
227
+ client_content: {
228
+ turns: [
229
+ {
230
+ role: "user",
231
+ parts: [],
232
+ },
233
+ ],
234
+ turn_complete: false,
235
+ },
236
+ };
237
+ this.sendMessage(message);
238
+ }
239
+
240
+ sendTextMessage(text) {
241
+ this.sendMessage({
242
+ client_content: {
243
+ turn_complete: true,
244
+ turns: [{ role: "user", parts: [{ text: text }] }],
245
+ },
246
+ });
247
+ }
248
+
249
+ sendToolResponse(functionResponses) {
250
+ const toolResponse = {
251
+ tool_response: {
252
+ function_responses: functionResponses,
253
+ },
254
+ };
255
+ this.sendMessage(toolResponse);
256
+ }
257
+
258
+ async ensureConnected() {
259
+ if (this.ws.readyState === WebSocket.OPEN) {
260
+ return;
261
+ }
262
+
263
+ return new Promise((resolve, reject) => {
264
+ const timeout = setTimeout(() => {
265
+ reject(new Error("Connection timeout"));
266
+ }, 5000);
267
+
268
+ const onOpen = () => {
269
+ clearTimeout(timeout);
270
+ this.ws.removeEventListener("open", onOpen);
271
+ this.ws.removeEventListener("error", onError);
272
+ resolve();
273
+ };
274
+
275
+ const onError = (error) => {
276
+ clearTimeout(timeout);
277
+ this.ws.removeEventListener("open", onOpen);
278
+ this.ws.removeEventListener("error", onError);
279
+ reject(error);
280
+ };
281
+
282
+ this.ws.addEventListener("open", onOpen);
283
+ this.ws.addEventListener("error", onError);
284
+ });
285
+ }
286
+
287
+ render() {
288
+ if (this.enabled) {
289
+ return html`<span @click="${this.stop}"><slot></slot></span>`;
290
+ } else {
291
+ return html`<span @click="${this.start}"><slot></slot></span>`;
292
+ }
293
+ }
294
+ }
295
+
296
+ customElements.define("gemini-live-connection", GeminiLiveConnection);
web_components/gemini_live_connection.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Callable
2
+
3
+ import mesop.labs as mel
4
+
5
+
6
+ _HOST = "generativelanguage.googleapis.com"
7
+
8
+ _GEMINI_BIDI_WEBSOCKET_URI = "wss://{host}/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent?key={api_key}"
9
+
10
+
11
+ @mel.web_component(path="./gemini_live_connection.js")
12
+ def gemini_live_connection(
13
+ *,
14
+ enabled: bool = False,
15
+ api_key: str = "",
16
+ api_config: str = "",
17
+ on_start: Callable[[mel.WebEvent], Any] | None = None,
18
+ on_stop: Callable[[mel.WebEvent], Any] | None = None,
19
+ on_tool_call: Callable[[mel.WebEvent], Any] | None = None,
20
+ tool_call_responses: str = "",
21
+ text_input: str = "",
22
+ ):
23
+ return mel.insert_web_component(
24
+ name="gemini-live-connection",
25
+ events=_filter_events(
26
+ {
27
+ "startEvent": on_start,
28
+ "stopEvent": on_stop,
29
+ "toolCallEvent": on_tool_call,
30
+ }
31
+ ),
32
+ properties={
33
+ "api_config": api_config,
34
+ "enabled": enabled,
35
+ "endpoint": _GEMINI_BIDI_WEBSOCKET_URI.format(host=_HOST, api_key=api_key),
36
+ "tool_call_responses": tool_call_responses,
37
+ "text_input": text_input,
38
+ },
39
+ )
40
+
41
+
42
+ def _filter_events(events: dict[str, Callable[[mel.WebEvent], Any] | None]):
43
+ return {event: callback for event, callback in events.items() if callback}
web_components/video_recorder.js ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {
2
+ LitElement,
3
+ html,
4
+ css,
5
+ } from "https://cdn.jsdelivr.net/gh/lit/dist@3/core/lit-core.min.js";
6
+
7
+ class VideoRecorder extends LitElement {
8
+ static styles = css`
9
+ :host {
10
+ display: block;
11
+ }
12
+
13
+ .video-container {
14
+ position: relative;
15
+ width: 100%;
16
+ max-width: 640px;
17
+ margin: 0 auto;
18
+ }
19
+
20
+ video {
21
+ width: 100%;
22
+ height: auto;
23
+ background: #000;
24
+ }
25
+
26
+ .controls {
27
+ margin-top: 10px;
28
+ text-align: center;
29
+ }
30
+
31
+ button {
32
+ padding: 8px 16px;
33
+ font-size: 16px;
34
+ cursor: pointer;
35
+ }
36
+ `;
37
+
38
+ static properties = {
39
+ dataEvent: { type: String },
40
+ recordEvent: { type: String },
41
+ isRecording: { type: Boolean },
42
+ enabled: { type: Boolean },
43
+ quality: { type: Number },
44
+ fps: { type: Number },
45
+ showPreview: { type: Boolean },
46
+ };
47
+
48
+ constructor() {
49
+ super();
50
+ this.debug = false;
51
+ this.mediaStream = null;
52
+ this.isStreaming = false;
53
+ this.isRecording = false;
54
+ this.isInitializing = false;
55
+ this.enabled = false;
56
+ this.quality = 0.8; // JPEG quality
57
+ this.fps = 2; // Frames per second
58
+ this.showPreview = true; // Enable preview by default
59
+
60
+ // Setup canvas and video elements
61
+ this.video = document.createElement("video");
62
+ this.video.setAttribute("playsinline", ""); // Better mobile support
63
+ this.video.setAttribute("autoplay", "");
64
+ this.video.setAttribute("muted", "");
65
+ this.canvas = document.createElement("canvas");
66
+ this.ctx = this.canvas.getContext("2d");
67
+ this.captureInterval = null;
68
+ }
69
+
70
+ disconnectedCallback() {
71
+ this.stop();
72
+ super.disconnectedCallback();
73
+ }
74
+
75
+ firstUpdated() {
76
+ if (this.enabled) {
77
+ this.startStreaming();
78
+ }
79
+ }
80
+
81
+ log(...args) {
82
+ if (this.debug) {
83
+ console.log(...args);
84
+ }
85
+ }
86
+
87
+ warn(...args) {
88
+ if (this.debug) {
89
+ console.warn(...args);
90
+ }
91
+ }
92
+
93
+ error(...args) {
94
+ if (this.debug) {
95
+ console.error(...args);
96
+ }
97
+ }
98
+
99
+ async startStreaming() {
100
+ if (!this.enabled) {
101
+ // this.dispatchEvent(new MesopEvent(this.recordEvent, {}));
102
+ }
103
+ this.isInitializing = true;
104
+ const initialized = await this.initialize();
105
+ this.isInitializing = false;
106
+ if (initialized) {
107
+ this.isRecording = true;
108
+ this.start();
109
+ }
110
+ }
111
+
112
+ async initialize() {
113
+ try {
114
+ this.mediaStream = await navigator.mediaDevices.getUserMedia({
115
+ video: {
116
+ width: { ideal: 1280 },
117
+ height: { ideal: 720 },
118
+ },
119
+ });
120
+
121
+ this.video.srcObject = this.mediaStream;
122
+ await this.video.play();
123
+
124
+ // Wait for video to be ready
125
+ await new Promise((resolve) => {
126
+ this.video.onloadedmetadata = () => {
127
+ this.canvas.width = this.video.videoWidth;
128
+ this.canvas.height = this.video.videoHeight;
129
+ resolve();
130
+ };
131
+ });
132
+
133
+ // Request a redraw to show the video preview
134
+ this.requestUpdate();
135
+ return true;
136
+ } catch (error) {
137
+ this.error("Error accessing webcam:", error);
138
+ return false;
139
+ }
140
+ }
141
+
142
+ captureFrame() {
143
+ if (!this.mediaStream) {
144
+ this.error("Webcam not started");
145
+ return null;
146
+ }
147
+
148
+ // Draw current video frame to canvas
149
+ this.ctx.drawImage(this.video, 0, 0);
150
+
151
+ // Convert to JPEG and base64 encode
152
+ const base64Data = this.canvas.toDataURL("image/jpeg", this.quality);
153
+
154
+ // Remove the data URL prefix to get just the base64 data
155
+ return base64Data.replace("data:image/jpeg;base64,", "");
156
+ }
157
+
158
+ start() {
159
+ this.isStreaming = true;
160
+
161
+ // Start capturing frames at specified FPS
162
+ const intervalMs = 1000 / this.fps;
163
+ this.captureInterval = setInterval(() => {
164
+ const base64Frame = this.captureFrame();
165
+ if (base64Frame) {
166
+ this.dispatchEvent(
167
+ new MesopEvent(this.dataEvent, {
168
+ data: base64Frame,
169
+ })
170
+ );
171
+ }
172
+ }, intervalMs);
173
+
174
+ return true;
175
+ }
176
+
177
+ stop() {
178
+ this.isStreaming = false;
179
+ this.isRecording = false;
180
+
181
+ if (this.captureInterval) {
182
+ clearInterval(this.captureInterval);
183
+ this.captureInterval = null;
184
+ }
185
+
186
+ if (this.mediaStream) {
187
+ this.mediaStream.getTracks().forEach((track) => track.stop());
188
+ this.mediaStream = null;
189
+ }
190
+
191
+ // Clear video source
192
+ if (this.video.srcObject) {
193
+ this.video.srcObject = null;
194
+ }
195
+ }
196
+
197
+ render() {
198
+ return html`
199
+ <div class="video-container">
200
+ ${this.showPreview && (this.isRecording || this.isInitializing)
201
+ ? html`<video
202
+ .srcObject="${this.mediaStream}"
203
+ playsinline
204
+ autoplay
205
+ muted
206
+ ></video>`
207
+ : null}
208
+
209
+ <div class="controls">
210
+ ${this.isInitializing
211
+ ? html`<div>Initializing video recorder...</div>`
212
+ : this.isRecording
213
+ ? html`<button @click="${this.stop}">Stop Recording</button>`
214
+ : html`<button @click="${this.startStreaming}">
215
+ Start Recording
216
+ </button>`}
217
+ </div>
218
+ </div>
219
+ `;
220
+ }
221
+ }
222
+
223
+ customElements.define("video-recorder", VideoRecorder);
web_components/video_recorder.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Callable
2
+
3
+ import mesop.labs as mel
4
+
5
+
6
+ @mel.web_component(path="./video_recorder.js")
7
+ def video_recorder(
8
+ *,
9
+ enabled: bool = False,
10
+ on_data: Callable[[mel.WebEvent], Any],
11
+ on_record: Callable[[mel.WebEvent], Any],
12
+ ):
13
+ """Records video and streams video to the Mesop server.
14
+
15
+ This web components is designed to work with `MESOP_WEBSOCKETS_ENABLED=true`.
16
+
17
+ The data event looks like:
18
+
19
+ {
20
+ "data": <base64-encoded-string>
21
+ }
22
+ """
23
+ return mel.insert_web_component(
24
+ name="video-recorder",
25
+ events={
26
+ "dataEvent": on_data,
27
+ "recordEvent": on_record,
28
+ },
29
+ properties={
30
+ "enabled": enabled,
31
+ },
32
+ )