Spaces:
Running
Running
Richard
commited on
Commit
·
09ed935
0
Parent(s):
Initial commit
Browse files- .gitignore +16 -0
- README.md +0 -0
- css.py +152 -0
- main.py +381 -0
- models.py +17 -0
- question_bank.py +132 -0
- requirements.txt +6 -0
- ruff.toml +2 -0
- state.py +41 -0
- trebek_bot.py +91 -0
- web_components/audio_player.js +172 -0
- web_components/audio_player.py +40 -0
- web_components/audio_recorder.js +496 -0
- web_components/audio_recorder.py +44 -0
- web_components/gemini_live_connection.js +296 -0
- web_components/gemini_live_connection.py +43 -0
- web_components/video_recorder.js +223 -0
- web_components/video_recorder.py +32 -0
.gitignore
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# System
|
2 |
+
.DS_Store
|
3 |
+
|
4 |
+
# Python
|
5 |
+
__pycache__
|
6 |
+
|
7 |
+
# VS Code
|
8 |
+
*.code-workspace
|
9 |
+
.pytest_cache
|
10 |
+
|
11 |
+
# Dyad
|
12 |
+
.dyad
|
13 |
+
|
14 |
+
# App
|
15 |
+
.env
|
16 |
+
data
|
README.md
ADDED
File without changes
|
css.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import mesop as me
|
2 |
+
|
3 |
+
from state import State
|
4 |
+
|
5 |
+
COLOR_BLUE = "blue"
|
6 |
+
COLOR_YELLOW = "#f0cd6e"
|
7 |
+
COLOR_RED = "#cc153c"
|
8 |
+
COLOR_DISABLED = "#e4e4e4"
|
9 |
+
COLOR_DISABLED_BUTTON_BG = "#ccc"
|
10 |
+
|
11 |
+
|
12 |
+
MAIN_COL_GRID = me.Style(
|
13 |
+
background="#ececec",
|
14 |
+
display="grid",
|
15 |
+
grid_template_columns="70% 30%",
|
16 |
+
height="100vh",
|
17 |
+
)
|
18 |
+
|
19 |
+
SIDEBAR = me.Style(
|
20 |
+
color="#111",
|
21 |
+
overflow_y="scroll",
|
22 |
+
padding=me.Padding.all(20),
|
23 |
+
)
|
24 |
+
|
25 |
+
SIDEBAR_SECTION = me.Style(margin=me.Margin(bottom=15))
|
26 |
+
|
27 |
+
TOOLBAR_SECTION = me.Style(
|
28 |
+
margin=me.Margin(bottom=15),
|
29 |
+
padding=me.Padding.all(5),
|
30 |
+
background=me.theme_var("surface-container-highest"),
|
31 |
+
justify_content="space-evenly",
|
32 |
+
display="flex",
|
33 |
+
flex_direction="row",
|
34 |
+
)
|
35 |
+
|
36 |
+
TEXT_INPUT = me.Style(width="100%")
|
37 |
+
|
38 |
+
|
39 |
+
def sidebar_header() -> me.Style:
|
40 |
+
state = me.state(State)
|
41 |
+
return me.Style(color="#000" if state.gemini_live_api_enabled else "#aaa")
|
42 |
+
|
43 |
+
|
44 |
+
def game_button() -> me.Style:
|
45 |
+
state = me.state(State)
|
46 |
+
if not state.api_key:
|
47 |
+
return me.Style()
|
48 |
+
if state.gemini_live_api_enabled:
|
49 |
+
return me.Style(background=me.theme_var("error"), color=me.theme_var("on-error"))
|
50 |
+
return me.Style(background=me.theme_var("primary"), color=me.theme_var("on-primary"))
|
51 |
+
|
52 |
+
|
53 |
+
def audio_button() -> me.Style:
|
54 |
+
state = me.state(State)
|
55 |
+
if state.audio_player_enabled:
|
56 |
+
return me.Style(background=me.theme_var("tertiary"), color=me.theme_var("on-tertiary"))
|
57 |
+
return me.Style()
|
58 |
+
|
59 |
+
|
60 |
+
def mic_button() -> me.Style:
|
61 |
+
state = me.state(State)
|
62 |
+
if state.audio_recorder_state == "recording":
|
63 |
+
return me.Style(background=me.theme_var("tertiary"), color=me.theme_var("on-tertiary"))
|
64 |
+
if state.gemini_live_api_enabled:
|
65 |
+
return me.Style(background=me.theme_var("error"), color=me.theme_var("on-error"))
|
66 |
+
return me.Style()
|
67 |
+
|
68 |
+
|
69 |
+
def score_box() -> me.Style:
|
70 |
+
state = me.state(State)
|
71 |
+
return me.Style(
|
72 |
+
background=COLOR_BLUE if state.gemini_live_api_enabled else COLOR_DISABLED,
|
73 |
+
color="white" if state.gemini_live_api_enabled else COLOR_DISABLED,
|
74 |
+
font_weight="bold",
|
75 |
+
font_size="2.2vw",
|
76 |
+
padding=me.Padding.all(15),
|
77 |
+
text_align="center",
|
78 |
+
)
|
79 |
+
|
80 |
+
|
81 |
+
def current_clue_box() -> me.Style:
|
82 |
+
state = me.state(State)
|
83 |
+
return me.Style(
|
84 |
+
background=COLOR_BLUE if state.gemini_live_api_enabled else COLOR_DISABLED,
|
85 |
+
color=COLOR_YELLOW if state.gemini_live_api_enabled else COLOR_DISABLED,
|
86 |
+
font_size="1em",
|
87 |
+
font_weight="bold",
|
88 |
+
padding=me.Padding.all(15),
|
89 |
+
)
|
90 |
+
|
91 |
+
|
92 |
+
def board_col_grid() -> me.Style:
|
93 |
+
state = me.state(State)
|
94 |
+
return me.Style(
|
95 |
+
background="#000" if state.gemini_live_api_enabled else "#ddd",
|
96 |
+
display="grid",
|
97 |
+
gap="5px",
|
98 |
+
grid_template_columns="repeat(6, 1fr)",
|
99 |
+
)
|
100 |
+
|
101 |
+
|
102 |
+
def category_box() -> me.Style:
|
103 |
+
state = me.state(State)
|
104 |
+
return me.Style(
|
105 |
+
background=COLOR_BLUE if state.gemini_live_api_enabled else COLOR_DISABLED,
|
106 |
+
color="white",
|
107 |
+
font_weight="bold",
|
108 |
+
font_size="1em",
|
109 |
+
padding=me.Padding.all(15),
|
110 |
+
text_align="center",
|
111 |
+
)
|
112 |
+
|
113 |
+
|
114 |
+
def clue_box(is_selectable: bool) -> me.Style:
|
115 |
+
"""Style for clue box
|
116 |
+
|
117 |
+
Args:
|
118 |
+
is_selectable: Visual signify if the clue is selectable.
|
119 |
+
"""
|
120 |
+
state = me.state(State)
|
121 |
+
return me.Style(
|
122 |
+
background=COLOR_BLUE if state.gemini_live_api_enabled else COLOR_DISABLED,
|
123 |
+
color=COLOR_YELLOW,
|
124 |
+
cursor="pointer" if is_selectable else "default",
|
125 |
+
font_size="1em",
|
126 |
+
font_weight="bold",
|
127 |
+
padding=me.Padding.all(15),
|
128 |
+
text_align="center",
|
129 |
+
)
|
130 |
+
|
131 |
+
|
132 |
+
def response_button(disabled: bool) -> me.Style:
|
133 |
+
"""Styles for response submit button.
|
134 |
+
|
135 |
+
Args:
|
136 |
+
disabled: Since we're overriding the style, we need to handle disabled state
|
137 |
+
"""
|
138 |
+
if disabled:
|
139 |
+
return me.Style(background=COLOR_DISABLED_BUTTON_BG, color="#eee")
|
140 |
+
return me.Style(background=COLOR_BLUE, color="white")
|
141 |
+
|
142 |
+
|
143 |
+
def score_text(score: int) -> me.Style:
|
144 |
+
"""In Jeopardy when the score is negative, it is red instead of white."""
|
145 |
+
state = me.state(State)
|
146 |
+
if not state.gemini_live_api_enabled:
|
147 |
+
return me.Style(color=COLOR_DISABLED)
|
148 |
+
|
149 |
+
if score < 0:
|
150 |
+
return me.Style(color=COLOR_RED)
|
151 |
+
|
152 |
+
return me.Style(color="white")
|
main.py
ADDED
@@ -0,0 +1,381 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import time
|
3 |
+
|
4 |
+
import css
|
5 |
+
import trebek_bot
|
6 |
+
from models import Clue
|
7 |
+
import mesop as me
|
8 |
+
import mesop.labs as mel
|
9 |
+
from web_components.gemini_live_connection import gemini_live_connection
|
10 |
+
from web_components.audio_recorder import audio_recorder
|
11 |
+
from web_components.audio_player import audio_player
|
12 |
+
from state import State
|
13 |
+
|
14 |
+
|
15 |
+
def on_load(e: me.LoadEvent):
|
16 |
+
"""Update system instructions with the randomly selected game categories."""
|
17 |
+
state = me.state(State)
|
18 |
+
categories = [question_set[0].category for question_set in state.board.clues]
|
19 |
+
state.gemini_live_api_config = trebek_bot.make_gemini_live_api_config(
|
20 |
+
system_instructions=trebek_bot.make_system_instruction(categories)
|
21 |
+
)
|
22 |
+
|
23 |
+
|
24 |
+
@me.page(
|
25 |
+
path="/",
|
26 |
+
title="Mesop Jeopardy Live",
|
27 |
+
security_policy=me.SecurityPolicy(
|
28 |
+
allowed_connect_srcs=["wss://generativelanguage.googleapis.com"],
|
29 |
+
allowed_iframe_parents=["https://huggingface.co"],
|
30 |
+
allowed_script_srcs=[
|
31 |
+
"https://cdn.jsdelivr.net",
|
32 |
+
],
|
33 |
+
),
|
34 |
+
on_load=on_load,
|
35 |
+
)
|
36 |
+
def app():
|
37 |
+
state = me.state(State)
|
38 |
+
|
39 |
+
with me.box(style=css.MAIN_COL_GRID):
|
40 |
+
with me.box(style=css.board_col_grid()):
|
41 |
+
for col_index in range(len(state.board.clues[0])):
|
42 |
+
# Render Jeopardy categories
|
43 |
+
if col_index == 0:
|
44 |
+
for row_index in range(len(state.board.clues)):
|
45 |
+
cell = state.board.clues[row_index][col_index]
|
46 |
+
with me.box(style=css.category_box()):
|
47 |
+
if state.gemini_live_api_enabled:
|
48 |
+
me.text(cell.category)
|
49 |
+
else:
|
50 |
+
me.text("")
|
51 |
+
|
52 |
+
# Render Jeopardy questions
|
53 |
+
for row_index in range(len(state.board.clues)):
|
54 |
+
cell = state.board.clues[row_index][col_index]
|
55 |
+
key = f"clue-{row_index}-{col_index}"
|
56 |
+
is_selectable = not (key in state.answered_questions or state.selected_question_key)
|
57 |
+
with me.box(
|
58 |
+
style=css.clue_box(state.gemini_live_api_enabled and is_selectable),
|
59 |
+
key=key,
|
60 |
+
on_click=on_click_cell,
|
61 |
+
):
|
62 |
+
if not state.gemini_live_api_enabled:
|
63 |
+
me.text("")
|
64 |
+
elif key in state.answered_questions:
|
65 |
+
me.text("")
|
66 |
+
elif key == state.selected_question_key:
|
67 |
+
me.text(cell.question, style=me.Style(text_align="left"))
|
68 |
+
else:
|
69 |
+
me.text(f"${cell.normalized_value}", style=me.Style(font_size="2.2vw"))
|
70 |
+
|
71 |
+
# Sidebar
|
72 |
+
with me.box(style=css.SIDEBAR):
|
73 |
+
me.input(
|
74 |
+
label="Google API Key",
|
75 |
+
on_input=on_input_api_key,
|
76 |
+
readonly=state.gemini_live_api_enabled,
|
77 |
+
style=css.TEXT_INPUT,
|
78 |
+
type="password",
|
79 |
+
value=state.api_key,
|
80 |
+
)
|
81 |
+
|
82 |
+
with me.box(style=css.TOOLBAR_SECTION):
|
83 |
+
gemini_live_button()
|
84 |
+
audio_player_button()
|
85 |
+
audio_recorder_button()
|
86 |
+
|
87 |
+
# Score
|
88 |
+
with me.box(style=css.SIDEBAR_SECTION):
|
89 |
+
me.text("Score", type="headline-5", style=css.sidebar_header())
|
90 |
+
with me.box(style=css.score_box()):
|
91 |
+
me.text(format_dollars(state.score), style=css.score_text(state.score))
|
92 |
+
|
93 |
+
# Clue
|
94 |
+
with me.box(style=css.SIDEBAR_SECTION):
|
95 |
+
me.text("Clue", type="headline-5", style=css.sidebar_header())
|
96 |
+
with me.box(style=css.current_clue_box()):
|
97 |
+
if state.selected_question_key:
|
98 |
+
selected_question = get_selected_question(state.board, state.selected_question_key)
|
99 |
+
me.text(selected_question.question)
|
100 |
+
else:
|
101 |
+
me.text("No clue selected. Please select one.", style=me.Style(font_style="italic"))
|
102 |
+
|
103 |
+
# Response
|
104 |
+
with me.box(style=css.SIDEBAR_SECTION):
|
105 |
+
me.text("Response", type="headline-5", style=css.sidebar_header())
|
106 |
+
me.textarea(
|
107 |
+
disabled=not bool(state.selected_question_key),
|
108 |
+
label="Enter your response",
|
109 |
+
on_blur=on_input_response,
|
110 |
+
style=css.TEXT_INPUT,
|
111 |
+
value=state.response_value,
|
112 |
+
)
|
113 |
+
|
114 |
+
disabled = not bool(state.selected_question_key)
|
115 |
+
me.button(
|
116 |
+
disabled=disabled,
|
117 |
+
label="Submit your response",
|
118 |
+
on_click=on_click_submit,
|
119 |
+
style=css.response_button(disabled),
|
120 |
+
type="flat",
|
121 |
+
)
|
122 |
+
|
123 |
+
|
124 |
+
@me.component
|
125 |
+
def gemini_live_button():
|
126 |
+
state = me.state(State)
|
127 |
+
with gemini_live_connection(
|
128 |
+
api_config=state.gemini_live_api_config,
|
129 |
+
api_key=state.api_key,
|
130 |
+
enabled=state.gemini_live_api_enabled,
|
131 |
+
on_start=on_gemini_live_api_started,
|
132 |
+
on_stop=on_gemini_live_api_stopped,
|
133 |
+
on_tool_call=handle_tool_calls,
|
134 |
+
text_input=state.text_input,
|
135 |
+
tool_call_responses=state.tool_call_responses,
|
136 |
+
):
|
137 |
+
with me.tooltip(message=get_gemini_live_tooltip()):
|
138 |
+
with me.content_button(
|
139 |
+
disabled=not state.api_key,
|
140 |
+
style=css.game_button(),
|
141 |
+
type="icon",
|
142 |
+
):
|
143 |
+
if state.gemini_live_api_enabled:
|
144 |
+
me.icon(icon="stop")
|
145 |
+
else:
|
146 |
+
me.icon(icon="play_arrow")
|
147 |
+
|
148 |
+
|
149 |
+
@me.component
|
150 |
+
def audio_player_button():
|
151 |
+
state = me.state(State)
|
152 |
+
with audio_player(
|
153 |
+
enabled=state.audio_player_enabled, on_play=on_audio_play, on_stop=on_audio_stop
|
154 |
+
):
|
155 |
+
with me.tooltip(message=get_audio_player_tooltip()):
|
156 |
+
with me.content_button(
|
157 |
+
disabled=True,
|
158 |
+
style=css.audio_button(),
|
159 |
+
type="icon",
|
160 |
+
):
|
161 |
+
if state.audio_player_enabled:
|
162 |
+
me.icon(icon="volume_up")
|
163 |
+
else:
|
164 |
+
me.icon(icon="volume_mute")
|
165 |
+
|
166 |
+
|
167 |
+
@me.component
|
168 |
+
def audio_recorder_button():
|
169 |
+
state = me.state(State)
|
170 |
+
with audio_recorder(
|
171 |
+
state=state.audio_recorder_state, on_state_change=on_audio_recorder_state_change
|
172 |
+
):
|
173 |
+
with me.tooltip(message=get_audio_recorder_tooltip()):
|
174 |
+
with me.content_button(
|
175 |
+
disabled=not state.gemini_live_api_enabled,
|
176 |
+
style=css.mic_button(),
|
177 |
+
type="icon",
|
178 |
+
):
|
179 |
+
if state.audio_recorder_state == "initializing":
|
180 |
+
me.icon(icon="pending")
|
181 |
+
else:
|
182 |
+
me.icon(icon="mic")
|
183 |
+
|
184 |
+
|
185 |
+
def on_click_cell(e: me.ClickEvent):
|
186 |
+
"""Selects the given clue by prompting Gemini Live API."""
|
187 |
+
state = me.state(State)
|
188 |
+
clue = get_selected_question(state.board, e.key)
|
189 |
+
me.state(State).text_input = f"I'd like to select {clue.category}, for ${clue.normalized_value}."
|
190 |
+
|
191 |
+
|
192 |
+
def on_input_response(e: me.InputBlurEvent):
|
193 |
+
"""Stores user input into state, so we can process their response."""
|
194 |
+
state = me.state(State)
|
195 |
+
state.response = e.value
|
196 |
+
|
197 |
+
|
198 |
+
def on_click_submit(e: me.ClickEvent):
|
199 |
+
"""Submit user response to clue to check if they are correct using Gemini Live API."""
|
200 |
+
state = me.state(State)
|
201 |
+
if not state.response.strip():
|
202 |
+
return
|
203 |
+
|
204 |
+
state.text_input = state.response
|
205 |
+
|
206 |
+
# Hack to reset text input. Update the initial response value to current response
|
207 |
+
# first, which will trigger a diff when we set the initial response back to empty
|
208 |
+
# string.
|
209 |
+
#
|
210 |
+
# A small delay is also needed because some times the yield happens too fast, which
|
211 |
+
# does not allow the UI on the client to update properly.
|
212 |
+
state.response_value = state.response
|
213 |
+
yield
|
214 |
+
time.sleep(0.5)
|
215 |
+
state.response_value = ""
|
216 |
+
yield
|
217 |
+
|
218 |
+
|
219 |
+
def get_selected_question(board, selected_question_key) -> Clue:
|
220 |
+
"""Gets the selected question from the key."""
|
221 |
+
_, row, col = selected_question_key.split("-")
|
222 |
+
return board.clues[int(row)][int(col)]
|
223 |
+
|
224 |
+
|
225 |
+
def format_dollars(value: int) -> str:
|
226 |
+
"""Formats an integer value in US dollars format."""
|
227 |
+
if value < 0:
|
228 |
+
return f"-${value * -1:,}"
|
229 |
+
return f"${value:,}"
|
230 |
+
|
231 |
+
|
232 |
+
def get_gemini_live_tooltip() -> str:
|
233 |
+
"""Tooltip messages for Gemini Live API web component button."""
|
234 |
+
state = me.state(State)
|
235 |
+
if state.gemini_live_api_enabled:
|
236 |
+
return "Stop game"
|
237 |
+
if state.api_key:
|
238 |
+
return "Start game"
|
239 |
+
return "Game disabled. Enter API Key."
|
240 |
+
|
241 |
+
|
242 |
+
def get_audio_player_tooltip() -> str:
|
243 |
+
"""Tooltip messages for Audio player web component button."""
|
244 |
+
state = me.state(State)
|
245 |
+
if state.audio_player_enabled:
|
246 |
+
return "Audio playing"
|
247 |
+
if state.gemini_live_api_enabled:
|
248 |
+
return "Audio not playing"
|
249 |
+
return "Audio disabled"
|
250 |
+
|
251 |
+
|
252 |
+
def get_audio_recorder_tooltip() -> str:
|
253 |
+
"""Tooltip messages for Audio recorder web component button."""
|
254 |
+
state = me.state(State)
|
255 |
+
if state.audio_recorder_state == "initializing":
|
256 |
+
"Microphone initializing"
|
257 |
+
if state.audio_recorder_state == "recording":
|
258 |
+
return "Microphone on"
|
259 |
+
if state.gemini_live_api_enabled:
|
260 |
+
return "Microphone muted"
|
261 |
+
return "Microphone disabled"
|
262 |
+
|
263 |
+
|
264 |
+
def on_input_api_key(e: me.InputEvent):
|
265 |
+
"""Captures Google API key input"""
|
266 |
+
state = me.state(State)
|
267 |
+
state.api_key = e.value
|
268 |
+
|
269 |
+
|
270 |
+
def on_audio_play(e: mel.WebEvent):
|
271 |
+
"""Event for when audio player play button was clicked."""
|
272 |
+
me.state(State).audio_player_enabled = True
|
273 |
+
|
274 |
+
|
275 |
+
def on_audio_stop(e: mel.WebEvent):
|
276 |
+
"""Event for when audio player stop button was clicked."""
|
277 |
+
me.state(State).audio_player_enabled = False
|
278 |
+
|
279 |
+
|
280 |
+
def on_audio_recorder_state_change(e: mel.WebEvent):
|
281 |
+
"""Event for when audio recorder state changes."""
|
282 |
+
me.state(State).audio_recorder_state = e.value
|
283 |
+
|
284 |
+
|
285 |
+
def on_gemini_live_api_started(e: mel.WebEvent):
|
286 |
+
"""Event for when Gemin Live API start button was clicked."""
|
287 |
+
me.state(State).gemini_live_api_enabled = True
|
288 |
+
|
289 |
+
|
290 |
+
def on_gemini_live_api_stopped(e: mel.WebEvent):
|
291 |
+
"""Event for when Gemin Live API stop button was clicked."""
|
292 |
+
state = me.state(State)
|
293 |
+
state.gemini_live_api_enabled = False
|
294 |
+
state.selected_question_key = ""
|
295 |
+
state.response_value = ""
|
296 |
+
|
297 |
+
|
298 |
+
def handle_tool_calls(e: mel.WebEvent):
|
299 |
+
"""Proceses tool calls from Gemini Live API.
|
300 |
+
|
301 |
+
Supported tool calls:
|
302 |
+
|
303 |
+
- get_clue
|
304 |
+
- update_score
|
305 |
+
"""
|
306 |
+
state = me.state(State)
|
307 |
+
tool_calls = json.loads(e.value["toolCalls"])
|
308 |
+
responses = []
|
309 |
+
for tool_call in tool_calls:
|
310 |
+
result = None
|
311 |
+
if tool_call["name"] == "get_clue":
|
312 |
+
result = tool_call_get_clue(
|
313 |
+
tool_call["args"]["category_index"], tool_call["args"]["dollar_index"]
|
314 |
+
)
|
315 |
+
elif tool_call["name"] == "update_score":
|
316 |
+
result = tool_call_update_score(tool_call["args"]["is_correct"])
|
317 |
+
|
318 |
+
responses.append(
|
319 |
+
{
|
320 |
+
"id": tool_call["id"],
|
321 |
+
"name": tool_call["name"],
|
322 |
+
"response": {
|
323 |
+
"result": result,
|
324 |
+
},
|
325 |
+
}
|
326 |
+
)
|
327 |
+
|
328 |
+
if responses:
|
329 |
+
print(responses)
|
330 |
+
state.tool_call_responses = json.dumps(responses)
|
331 |
+
|
332 |
+
|
333 |
+
def tool_call_update_score(is_correct: bool) -> str:
|
334 |
+
"""Updates the user's score
|
335 |
+
|
336 |
+
Gemini will determine if the user is correct and then call this tool which will
|
337 |
+
allow the game state to be updated appropriately.
|
338 |
+
"""
|
339 |
+
state = me.state(State)
|
340 |
+
selected_question = get_selected_question(state.board, state.selected_question_key)
|
341 |
+
if is_correct:
|
342 |
+
state.score += selected_question.normalized_value
|
343 |
+
else:
|
344 |
+
state.score -= selected_question.normalized_value
|
345 |
+
|
346 |
+
# Clear question so another can be picked.
|
347 |
+
state.answered_questions.add(state.selected_question_key)
|
348 |
+
state.selected_question_key = ""
|
349 |
+
|
350 |
+
return f"The user's score is {state.score}"
|
351 |
+
|
352 |
+
|
353 |
+
def tool_call_get_clue(category_index, dollar_index) -> str:
|
354 |
+
"""Gets the selected clue.
|
355 |
+
|
356 |
+
Gemini will parse the user request and make a tool call with the row/col indexes.
|
357 |
+
|
358 |
+
Example: "Category X for $400".
|
359 |
+
"""
|
360 |
+
cell_key = f"clue-{category_index}-{dollar_index}"
|
361 |
+
response = handle_select_clue(cell_key)
|
362 |
+
|
363 |
+
if isinstance(response, str):
|
364 |
+
return "There was an error. " + response
|
365 |
+
|
366 |
+
return f"The clue is {response.question}\n\n The answer to the clue is {response.answer}\n\n Please read the clue to the user."
|
367 |
+
|
368 |
+
|
369 |
+
def handle_select_clue(clue_key: str) -> Clue | str:
|
370 |
+
"""Handles logic for clicking on a clue.
|
371 |
+
|
372 |
+
If it returns a string, it will be an error message.
|
373 |
+
If it returns a clue, that means a valid clue was selected.
|
374 |
+
"""
|
375 |
+
state = me.state(State)
|
376 |
+
if state.selected_question_key:
|
377 |
+
return "A clue has already been selected."
|
378 |
+
if clue_key in state.answered_questions:
|
379 |
+
return "That clue has already been selected"
|
380 |
+
state.selected_question_key = clue_key
|
381 |
+
return get_selected_question(state.board, state.selected_question_key)
|
models.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
|
3 |
+
|
4 |
+
class Clue(BaseModel):
|
5 |
+
air_date: str
|
6 |
+
category: str
|
7 |
+
question: str
|
8 |
+
value: str | None
|
9 |
+
answer: str
|
10 |
+
round: str
|
11 |
+
show_number: str
|
12 |
+
raw_value: int = 0
|
13 |
+
normalized_value: int = 0
|
14 |
+
|
15 |
+
|
16 |
+
class Board(BaseModel):
|
17 |
+
clues: list[list[Clue]]
|
question_bank.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import re
|
3 |
+
from collections import defaultdict
|
4 |
+
|
5 |
+
from models import Clue
|
6 |
+
|
7 |
+
|
8 |
+
QuestionSet = list[Clue]
|
9 |
+
|
10 |
+
_JEOPARDY_DATA = "data/jeopardy.json"
|
11 |
+
_NUM_QUESTIONS_PER_CATEGORY = 5
|
12 |
+
|
13 |
+
|
14 |
+
def load() -> list[QuestionSet]:
|
15 |
+
"""Loads a cleaned up data set to use in Mesop Jeopardy game."""
|
16 |
+
data = _load_raw_data()
|
17 |
+
data = _add_raw_value(data)
|
18 |
+
data = _clean_questions(data)
|
19 |
+
question_sets = _group_into_question_sets(data)
|
20 |
+
question_sets = _sort_question_sets(question_sets)
|
21 |
+
question_sets = _normalize_values(question_sets)
|
22 |
+
return _filter_out_incomplete_question_sets(question_sets)
|
23 |
+
|
24 |
+
|
25 |
+
def _load_raw_data() -> QuestionSet:
|
26 |
+
"""Load the raw data set.
|
27 |
+
|
28 |
+
Format of each question/clue looks like this:
|
29 |
+
|
30 |
+
{
|
31 |
+
"category": "HISTORY",
|
32 |
+
"air_date": "2004-12-31",
|
33 |
+
"question": "'For the last 8 years of his life, Galileo was...",
|
34 |
+
"value": "$200",
|
35 |
+
"answer": "Copernicus",
|
36 |
+
"round": "Jeopardy!",
|
37 |
+
"show_number": "4680"
|
38 |
+
}
|
39 |
+
"""
|
40 |
+
with open(_JEOPARDY_DATA, "r") as f:
|
41 |
+
return [Clue(**row) for row in json.load(f)]
|
42 |
+
|
43 |
+
|
44 |
+
def _add_raw_value(data: QuestionSet) -> QuestionSet:
|
45 |
+
"""Add raw value since the value is formatted as a dollar string that isn't as easy
|
46 |
+
to sort"""
|
47 |
+
for row in data:
|
48 |
+
row.raw_value = _convert_dollar_amount(row.value)
|
49 |
+
return data
|
50 |
+
|
51 |
+
|
52 |
+
def _clean_questions(data: QuestionSet) -> QuestionSet:
|
53 |
+
"""Clean up questions
|
54 |
+
|
55 |
+
- Strip single quotes around each question
|
56 |
+
- Replace escaped single quotes
|
57 |
+
- Strip HTML tags
|
58 |
+
"""
|
59 |
+
for row in data:
|
60 |
+
row.question = re.sub("<[^<]+?>", "", row.question.strip("'").replace("\\'", "'"))
|
61 |
+
return data
|
62 |
+
|
63 |
+
|
64 |
+
def _convert_dollar_amount(value: str | None) -> int:
|
65 |
+
"""Coverts raw value into an integer.
|
66 |
+
|
67 |
+
The raw value is string formatted as a dollar amount, such as $1,000. In this
|
68 |
+
dataset the dollar amount is not given for Daily Doubles that were not answered, so
|
69 |
+
we'll set those cases to a value of 0 for now.
|
70 |
+
|
71 |
+
In addition, answered daily doubles will have odd dollar amounts.
|
72 |
+
|
73 |
+
These values won't be used in the actually game. Only for roughly sorting the
|
74 |
+
question difficulty.
|
75 |
+
"""
|
76 |
+
if value:
|
77 |
+
return int(value.replace("$", "").replace(",", ""))
|
78 |
+
else:
|
79 |
+
return 0
|
80 |
+
|
81 |
+
|
82 |
+
def _group_into_question_sets(data: QuestionSet) -> list[QuestionSet]:
|
83 |
+
"""Groups the questions by category for that air date.
|
84 |
+
|
85 |
+
We want to mix and match questions across games, but we want to keep the questions
|
86 |
+
within a category together.
|
87 |
+
"""
|
88 |
+
question_sets = defaultdict(lambda: [])
|
89 |
+
for row in data:
|
90 |
+
question_sets[(row.category, row.air_date)].append(row)
|
91 |
+
return list(question_sets.values())
|
92 |
+
|
93 |
+
|
94 |
+
def _sort_question_sets(question_sets: list[QuestionSet]) -> list[QuestionSet]:
|
95 |
+
return [_sort_question_set(question_set) for question_set in question_sets]
|
96 |
+
|
97 |
+
|
98 |
+
def _sort_question_set(question_set: QuestionSet) -> QuestionSet:
|
99 |
+
"""Sort the question sets so they are ordered roughly in order difficulty.
|
100 |
+
|
101 |
+
This will not always be true due to Daily Doubles skewing the order. The data set
|
102 |
+
did not store the Daily Double values separately from the normal game value.
|
103 |
+
"""
|
104 |
+
return sorted(question_set, key=lambda q: q.raw_value)
|
105 |
+
|
106 |
+
|
107 |
+
def _normalize_values(question_sets: list[QuestionSet]) -> list[QuestionSet]:
|
108 |
+
"""Normalizes question dollar amounts based on order of appearance.
|
109 |
+
|
110 |
+
Since we picking random categories across different rounds and years, the dollar
|
111 |
+
values will differ. So we will normalize them here.
|
112 |
+
"""
|
113 |
+
for question_set in question_sets:
|
114 |
+
for index, question in enumerate(question_set):
|
115 |
+
question.normalized_value = (index + 1) * 200
|
116 |
+
return question_sets
|
117 |
+
|
118 |
+
|
119 |
+
def _filter_out_incomplete_question_sets(question_sets: list[QuestionSet]) -> list[QuestionSet]:
|
120 |
+
"""Filters out question sets that are incomplete (do not contain five questions).
|
121 |
+
|
122 |
+
Final Jeopardy categories only have one question so we want to ignore those.
|
123 |
+
We also want to avoid anomalies in the data set.
|
124 |
+
|
125 |
+
In addition there are cases where not all questions were answered for a category. This
|
126 |
+
means that we will be missing a question on the board.
|
127 |
+
"""
|
128 |
+
return [
|
129 |
+
question_set
|
130 |
+
for question_set in question_sets
|
131 |
+
if len(question_set) == _NUM_QUESTIONS_PER_CATEGORY
|
132 |
+
]
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Flask==3.1.0
|
2 |
+
google-genai==0.6.0
|
3 |
+
gunicorn==23.0.0
|
4 |
+
mesop==0.14.1
|
5 |
+
pydantic==2.10.5
|
6 |
+
websockets==14.2
|
ruff.toml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
line-length = 100
|
2 |
+
indent-width = 2
|
state.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Literal
|
2 |
+
from dataclasses import field
|
3 |
+
import random
|
4 |
+
import os
|
5 |
+
|
6 |
+
import question_bank
|
7 |
+
import mesop as me
|
8 |
+
from models import Board
|
9 |
+
|
10 |
+
|
11 |
+
_NUM_CATEGORIES = 6
|
12 |
+
_QUESTION_SETS = question_bank.load()
|
13 |
+
|
14 |
+
|
15 |
+
@me.stateclass
|
16 |
+
class State:
|
17 |
+
selected_clue: str
|
18 |
+
board: Board = field(default_factory=lambda: make_default_board(_QUESTION_SETS))
|
19 |
+
# Used for clearing the text input.
|
20 |
+
response_value: str
|
21 |
+
response: str
|
22 |
+
score: int
|
23 |
+
# Key format: click-{row_index}-{col_index}
|
24 |
+
selected_question_key: str
|
25 |
+
# Set is not JSON serializable
|
26 |
+
# Key format: click-{row_index}-{col_index}
|
27 |
+
answered_questions: set[str] = field(default_factory=set)
|
28 |
+
# Gemini Live API
|
29 |
+
api_key: str = os.getenv("GOOGLE_API_KEY", "")
|
30 |
+
gemini_live_api_enabled: bool = False
|
31 |
+
gemini_live_api_config: str
|
32 |
+
audio_player_enabled: bool = False
|
33 |
+
audio_recorder_state: Literal["disabled", "initializing", "recording"] = "disabled"
|
34 |
+
tool_call_responses: str = ""
|
35 |
+
text_input: str = ""
|
36 |
+
|
37 |
+
|
38 |
+
def make_default_board(jeopardy_questions) -> Board:
|
39 |
+
"""Creates a board with some random jeopardy questions."""
|
40 |
+
random.shuffle(jeopardy_questions)
|
41 |
+
return Board(clues=jeopardy_questions[:_NUM_CATEGORIES])
|
trebek_bot.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Literal
|
2 |
+
import json
|
3 |
+
|
4 |
+
|
5 |
+
type VoiceName = Literal["Aoede", "Charon", "Fenrir", "Kore", "Puck"]
|
6 |
+
type GeminiModel = Literal["gemini-2.0-flash-exp"]
|
7 |
+
|
8 |
+
|
9 |
+
_TOOL_DEFINITIONS = {
|
10 |
+
"functionDeclarations": [
|
11 |
+
{
|
12 |
+
"name": "get_clue",
|
13 |
+
"description": "Gets the clue from the board which returns the clue and answer",
|
14 |
+
"parameters": {
|
15 |
+
"type": "object",
|
16 |
+
"properties": {
|
17 |
+
"category_index": {"type": "integer", "description": "Index of selected category."},
|
18 |
+
"dollar_index": {"type": "integer", "description": "Index of selected dollar amount."},
|
19 |
+
},
|
20 |
+
"required": ["category_index", "dollar_index"],
|
21 |
+
},
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"name": "update_score",
|
25 |
+
"description": "Updates whether user got the question correct or not.",
|
26 |
+
"parameters": {
|
27 |
+
"type": "object",
|
28 |
+
"properties": {
|
29 |
+
"is_correct": {"type": "boolean", "description": "True if correct. False is incorrect."},
|
30 |
+
},
|
31 |
+
"required": ["is_correct"],
|
32 |
+
},
|
33 |
+
},
|
34 |
+
]
|
35 |
+
}
|
36 |
+
|
37 |
+
_SYSTEM_INSTRUCTIONS = """
|
38 |
+
You are the host of Jeopardy. Make sure users follow the rules of the game.
|
39 |
+
|
40 |
+
You have access to the following tools:
|
41 |
+
- get_clue: Gets the clue selected by the user. Always use this for picking clues. Do not make up your own clues.
|
42 |
+
- update_score: Updates the users score depending on if they answered the clue correctly.
|
43 |
+
|
44 |
+
The categories are [[categories]]. Each category has 5 questions, with the following dollar
|
45 |
+
amounts: $200, $400, $600, $800, $1000.
|
46 |
+
|
47 |
+
When the user asks for a clue, they will specify the category and dollar amount. Use the
|
48 |
+
`get_clue` tool by passing in the corresponding indexes for the category and dollar
|
49 |
+
amount.
|
50 |
+
|
51 |
+
For example if the categories are Witches, Gold Rush, American History, Desserts, Wet & Wild,
|
52 |
+
and the user says "American History for $800", the index will be 2 for the category and 3
|
53 |
+
for the dollar amount.
|
54 |
+
|
55 |
+
The `get_clue` tool will return the clue and answer if it is valid. If it is invalid it
|
56 |
+
will return an error message.
|
57 |
+
|
58 |
+
Wait for the `get_clue` tool response before responding.
|
59 |
+
|
60 |
+
When you get the response to the `get_clue` tool, read the clue to the user.
|
61 |
+
|
62 |
+
Briefly explain to the user why their answer is correct or wrong.
|
63 |
+
|
64 |
+
Use the `update_score` tool to update their score. Pass in true if they were correct.
|
65 |
+
Pass in false if they were not correct. This tool will return the user's current score.
|
66 |
+
""".strip()
|
67 |
+
|
68 |
+
|
69 |
+
def make_system_instruction(categories: list[str]):
|
70 |
+
return _SYSTEM_INSTRUCTIONS.replace("[[categories]]", ", ".join(categories))
|
71 |
+
|
72 |
+
|
73 |
+
def make_gemini_live_api_config(
|
74 |
+
model: GeminiModel = "gemini-2.0-flash-exp",
|
75 |
+
system_instructions: str = "",
|
76 |
+
voice_name: VoiceName = "Puck",
|
77 |
+
):
|
78 |
+
return json.dumps(
|
79 |
+
{
|
80 |
+
"setup": {
|
81 |
+
"model": f"models/{model}",
|
82 |
+
"system_instruction": {"role": "user", "parts": [{"text": system_instructions}]},
|
83 |
+
"tools": _TOOL_DEFINITIONS,
|
84 |
+
"generation_config": {
|
85 |
+
"temperature": 0.3,
|
86 |
+
"response_modalities": ["audio"],
|
87 |
+
"speech_config": {"voice_config": {"prebuilt_voice_config": {"voice_name": voice_name}}},
|
88 |
+
},
|
89 |
+
}
|
90 |
+
}
|
91 |
+
)
|
web_components/audio_player.js
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import {
|
2 |
+
LitElement,
|
3 |
+
html,
|
4 |
+
} from "https://cdn.jsdelivr.net/gh/lit/dist@3/core/lit-core.min.js";
|
5 |
+
|
6 |
+
class AudioPlayer extends LitElement {
|
7 |
+
static properties = {
|
8 |
+
playEvent: { type: String },
|
9 |
+
stopEvent: { type: String },
|
10 |
+
enabled: { type: Boolean },
|
11 |
+
data: { type: String },
|
12 |
+
};
|
13 |
+
|
14 |
+
constructor() {
|
15 |
+
super();
|
16 |
+
this.enabled = false;
|
17 |
+
this.audioContext = null; // Initialize audio context
|
18 |
+
this.sampleRate = 24000; // Gemini Live API sends data in 24000hz
|
19 |
+
this.channels = 1;
|
20 |
+
this.queue = [];
|
21 |
+
this.isPlaying = false;
|
22 |
+
|
23 |
+
this.onGeminiLiveStarted = (e) => {
|
24 |
+
if (!this.enabled) {
|
25 |
+
this.playAudio();
|
26 |
+
}
|
27 |
+
};
|
28 |
+
|
29 |
+
this.onGeminiLiveStopped = (e) => {
|
30 |
+
this.dispatchEvent(new MesopEvent(this.stopEvent, {}));
|
31 |
+
};
|
32 |
+
|
33 |
+
this.onAudioOutputReceived = (e) => {
|
34 |
+
this.addToQueue(e.detail.data);
|
35 |
+
};
|
36 |
+
}
|
37 |
+
|
38 |
+
connectedCallback() {
|
39 |
+
super.connectedCallback();
|
40 |
+
window.addEventListener(
|
41 |
+
"audio-output-received",
|
42 |
+
this.onAudioOutputReceived
|
43 |
+
);
|
44 |
+
window.addEventListener(
|
45 |
+
"gemini-live-api-started",
|
46 |
+
this.onGeminiLiveStarted
|
47 |
+
);
|
48 |
+
window.addEventListener(
|
49 |
+
"gemini-live-api-stopped",
|
50 |
+
this.onGeminiLiveStopped
|
51 |
+
);
|
52 |
+
}
|
53 |
+
|
54 |
+
disconnectedCallback() {
|
55 |
+
super.disconnectedCallback();
|
56 |
+
if (this.audioContext) {
|
57 |
+
this.audioContext.close();
|
58 |
+
}
|
59 |
+
window.removeEventListener(
|
60 |
+
"audio-output-received",
|
61 |
+
this.onAudioInputReceived
|
62 |
+
);
|
63 |
+
window.removeEventListener(
|
64 |
+
"gemini-live-api-started",
|
65 |
+
this.onGeminiLiveStarted
|
66 |
+
);
|
67 |
+
window.removeEventListener(
|
68 |
+
"gemini-live-api-stopped",
|
69 |
+
this.onGeminiLiveStopped
|
70 |
+
);
|
71 |
+
}
|
72 |
+
|
73 |
+
firstUpdated() {
|
74 |
+
if (this.enabled) {
|
75 |
+
this.playAudio();
|
76 |
+
}
|
77 |
+
}
|
78 |
+
|
79 |
+
updated(changedProperties) {
|
80 |
+
// Add audio chunks to queue to play.
|
81 |
+
if (changedProperties.has("data") && this.data.length > 0) {
|
82 |
+
this.addToQueue(this.data);
|
83 |
+
}
|
84 |
+
|
85 |
+
// Clear the queue if the audio player is disabled.
|
86 |
+
if (changedProperties.has("enabled") && !this.enabled) {
|
87 |
+
this.queue = [];
|
88 |
+
}
|
89 |
+
}
|
90 |
+
|
91 |
+
addToQueue(base64Data) {
|
92 |
+
if (!this.enabled) {
|
93 |
+
return;
|
94 |
+
}
|
95 |
+
this.queue.push(base64Data);
|
96 |
+
if (!this.isPlaying) {
|
97 |
+
this.playNext();
|
98 |
+
}
|
99 |
+
}
|
100 |
+
|
101 |
+
playAudio() {
|
102 |
+
if (!this.enabled) {
|
103 |
+
this.dispatchEvent(new MesopEvent(this.playEvent, {}));
|
104 |
+
}
|
105 |
+
if (!this.audioContext) {
|
106 |
+
this.audioContext = new AudioContext();
|
107 |
+
}
|
108 |
+
this.playNext();
|
109 |
+
}
|
110 |
+
|
111 |
+
playNext() {
|
112 |
+
if (!this.enabled || !this.audioContext || this.queue.length === 0) {
|
113 |
+
this.isPlaying = false;
|
114 |
+
return;
|
115 |
+
}
|
116 |
+
|
117 |
+
this.isPlaying = true;
|
118 |
+
const data = this.queue.shift();
|
119 |
+
const source = this.playPCM(data);
|
120 |
+
|
121 |
+
source.onended = () => {
|
122 |
+
this.playNext();
|
123 |
+
};
|
124 |
+
}
|
125 |
+
|
126 |
+
playPCM(data) {
|
127 |
+
// Convert base64 to binary.
|
128 |
+
const binaryAudio = atob(data);
|
129 |
+
|
130 |
+
// Convert binary string to ArrayBuffer.
|
131 |
+
const audioBuffer = new ArrayBuffer(binaryAudio.length);
|
132 |
+
const bufferView = new Uint8Array(audioBuffer);
|
133 |
+
for (let i = 0; i < binaryAudio.length; i++) {
|
134 |
+
bufferView[i] = binaryAudio.charCodeAt(i);
|
135 |
+
}
|
136 |
+
|
137 |
+
// Convert to 16-bit PCM data.
|
138 |
+
const pcmData = new Int16Array(audioBuffer);
|
139 |
+
|
140 |
+
// Create audio buffer.
|
141 |
+
const frameCount = pcmData.length;
|
142 |
+
const audioBufferData = this.audioContext.createBuffer(
|
143 |
+
this.channels,
|
144 |
+
frameCount,
|
145 |
+
this.sampleRate
|
146 |
+
);
|
147 |
+
|
148 |
+
// Get channel data and convert PCM to float32.
|
149 |
+
const channelData = audioBufferData.getChannelData(0);
|
150 |
+
for (let i = 0; i < frameCount; i++) {
|
151 |
+
// Convert 16-bit PCM (-32768 to 32767) to float32 (-1.0 to 1.0)
|
152 |
+
channelData[i] = pcmData[i] / 32768.0;
|
153 |
+
}
|
154 |
+
|
155 |
+
// Create and play the source.
|
156 |
+
const source = this.audioContext.createBufferSource();
|
157 |
+
source.buffer = audioBufferData;
|
158 |
+
source.connect(this.audioContext.destination);
|
159 |
+
source.start();
|
160 |
+
|
161 |
+
return source;
|
162 |
+
}
|
163 |
+
|
164 |
+
render() {
|
165 |
+
if (this.enabled) {
|
166 |
+
return html`<span><slot></slot></span>`;
|
167 |
+
}
|
168 |
+
return html`<span @click="${this.playAudio}"><slot></slot></span>`;
|
169 |
+
}
|
170 |
+
}
|
171 |
+
|
172 |
+
customElements.define("audio-player", AudioPlayer);
|
web_components/audio_player.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Callable
|
2 |
+
import base64
|
3 |
+
|
4 |
+
import mesop.labs as mel
|
5 |
+
|
6 |
+
|
7 |
+
@mel.web_component(path="./audio_player.js")
|
8 |
+
def audio_player(
|
9 |
+
*,
|
10 |
+
enabled: bool = False,
|
11 |
+
data: bytes = b"",
|
12 |
+
on_play: Callable[[mel.WebEvent], Any] | None = None,
|
13 |
+
on_stop: Callable[[mel.WebEvent], Any] | None = None,
|
14 |
+
):
|
15 |
+
"""Plays audio streamed from the server.
|
16 |
+
|
17 |
+
An important thing to note is that the audio player does not persist the data it
|
18 |
+
receives. Instead the data is stored in a queue and removed once the audio has been
|
19 |
+
played.
|
20 |
+
|
21 |
+
This is a barebones configuration that sets the sample rate to 24000hz since that is
|
22 |
+
what Gemini returns. In addition we expect the data to be in PCM format.
|
23 |
+
"""
|
24 |
+
return mel.insert_web_component(
|
25 |
+
name="audio-player",
|
26 |
+
events=_filter_events(
|
27 |
+
{
|
28 |
+
"playEvent": on_play,
|
29 |
+
"stopEvent": on_stop,
|
30 |
+
}
|
31 |
+
),
|
32 |
+
properties={
|
33 |
+
"enabled": enabled,
|
34 |
+
"data": base64.b64encode(data).decode("utf-8"),
|
35 |
+
},
|
36 |
+
)
|
37 |
+
|
38 |
+
|
39 |
+
def _filter_events(events: dict[str, Callable[[mel.WebEvent], Any] | None]):
|
40 |
+
return {event: callback for event, callback in events.items() if callback}
|
web_components/audio_recorder.js
ADDED
@@ -0,0 +1,496 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import {
|
2 |
+
LitElement,
|
3 |
+
html,
|
4 |
+
} from "https://cdn.jsdelivr.net/gh/lit/dist@3/core/lit-core.min.js";
|
5 |
+
|
6 |
+
class AudioRecorder extends LitElement {
|
7 |
+
static properties = {
|
8 |
+
dataEvent: { type: String },
|
9 |
+
stateChangeEvent: { type: String },
|
10 |
+
state: { type: String },
|
11 |
+
isRecording: { type: Boolean },
|
12 |
+
debugBuffer: { state: true },
|
13 |
+
debug: { type: Boolean },
|
14 |
+
voiceDetectionEnabled: { type: Boolean },
|
15 |
+
voiceThreshold: { type: Number },
|
16 |
+
voiceHoldTime: { type: Number },
|
17 |
+
};
|
18 |
+
|
19 |
+
constructor() {
|
20 |
+
super();
|
21 |
+
this.debug = false;
|
22 |
+
this.mediaStream = null;
|
23 |
+
this.audioContext = null;
|
24 |
+
this.processor = null;
|
25 |
+
this.isStreaming = false;
|
26 |
+
this.isRecording = false;
|
27 |
+
this.isInitializing = false;
|
28 |
+
this.sequenceNumber = 0;
|
29 |
+
this.debugBuffer = [];
|
30 |
+
this.debugBufferSize = 50;
|
31 |
+
this.targetSampleRate = 16000;
|
32 |
+
|
33 |
+
// Voice detection parameters
|
34 |
+
this.voiceDetectionEnabled = true; // Enable by default
|
35 |
+
this.voiceThreshold = 0.01; // RMS threshold for voice detection
|
36 |
+
this.voiceHoldTime = 500; // Time to hold voice detection state in ms
|
37 |
+
this.lastVoiceDetectedTime = 0; // Last time voice was detected
|
38 |
+
this.isVoiceDetected = false; // Current voice detection state
|
39 |
+
this.consecutiveSilentFrames = 0; // Counter for silent frames
|
40 |
+
this.silenceThreshold = 10; // Number of silent frames before cutting off
|
41 |
+
|
42 |
+
this.onGeminiLiveStarted = (e) => {
|
43 |
+
if (this.isRecording) {
|
44 |
+
this.startStreaming();
|
45 |
+
}
|
46 |
+
};
|
47 |
+
this.onGeminiLiveStopped = (e) => {
|
48 |
+
this.stop();
|
49 |
+
};
|
50 |
+
}
|
51 |
+
|
52 |
+
connectedCallback() {
|
53 |
+
super.connectedCallback();
|
54 |
+
window.addEventListener(
|
55 |
+
"gemini-live-api-started",
|
56 |
+
this.onGeminiLiveStarted
|
57 |
+
);
|
58 |
+
window.addEventListener(
|
59 |
+
"gemini-live-api-stopped",
|
60 |
+
this.onGeminiLiveStopped
|
61 |
+
);
|
62 |
+
}
|
63 |
+
|
64 |
+
disconnectedCallback() {
|
65 |
+
super.disconnectedCallback();
|
66 |
+
this.stop();
|
67 |
+
window.removeEventListener(
|
68 |
+
"gemini-live-api-started",
|
69 |
+
this.onAudioInputReceived
|
70 |
+
);
|
71 |
+
window.removeEventListener(
|
72 |
+
"gemini-live-api-stopped",
|
73 |
+
this.onGeminiLiveStopped
|
74 |
+
);
|
75 |
+
}
|
76 |
+
|
77 |
+
firstUpdated() {
|
78 |
+
if (this.state !== "disabled") {
|
79 |
+
this.startStreaming();
|
80 |
+
}
|
81 |
+
}
|
82 |
+
|
83 |
+
log(...args) {
|
84 |
+
if (this.debug) {
|
85 |
+
console.log(...args);
|
86 |
+
}
|
87 |
+
}
|
88 |
+
|
89 |
+
warn(...args) {
|
90 |
+
if (this.debug) {
|
91 |
+
console.warn(...args);
|
92 |
+
}
|
93 |
+
}
|
94 |
+
|
95 |
+
error(...args) {
|
96 |
+
if (this.debug) {
|
97 |
+
console.error(...args);
|
98 |
+
}
|
99 |
+
}
|
100 |
+
|
101 |
+
isVoiceFrame(audioData) {
|
102 |
+
// Calculate RMS of the audio frame
|
103 |
+
let sumSquares = 0;
|
104 |
+
for (let i = 0; i < audioData.length; i++) {
|
105 |
+
sumSquares += audioData[i] * audioData[i];
|
106 |
+
}
|
107 |
+
const rms = Math.sqrt(sumSquares / audioData.length);
|
108 |
+
|
109 |
+
const now = Date.now();
|
110 |
+
|
111 |
+
// Check if we detect voice in this frame
|
112 |
+
if (rms > this.voiceThreshold) {
|
113 |
+
this.lastVoiceDetectedTime = now;
|
114 |
+
this.consecutiveSilentFrames = 0;
|
115 |
+
this.isVoiceDetected = true;
|
116 |
+
return true;
|
117 |
+
}
|
118 |
+
|
119 |
+
// Check if we're still within the hold time
|
120 |
+
if (now - this.lastVoiceDetectedTime < this.voiceHoldTime) {
|
121 |
+
return true;
|
122 |
+
}
|
123 |
+
|
124 |
+
// Increment silent frames counter
|
125 |
+
this.consecutiveSilentFrames++;
|
126 |
+
|
127 |
+
// If we've seen enough silent frames, mark as silent
|
128 |
+
if (this.consecutiveSilentFrames > this.silenceThreshold) {
|
129 |
+
this.isVoiceDetected = false;
|
130 |
+
}
|
131 |
+
|
132 |
+
return this.isVoiceDetected;
|
133 |
+
}
|
134 |
+
|
135 |
+
async startStreaming() {
|
136 |
+
if (this.state === "disabled") {
|
137 |
+
this.dispatchEvent(new MesopEvent(this.stateChangeEvent, "initializing"));
|
138 |
+
}
|
139 |
+
this.isInitializing = true;
|
140 |
+
const initialized = await this.initialize();
|
141 |
+
this.isInitializing = false;
|
142 |
+
if (initialized) {
|
143 |
+
this.isRecording = true;
|
144 |
+
this.dispatchEvent(new MesopEvent(this.stateChangeEvent, "recording"));
|
145 |
+
this.start();
|
146 |
+
}
|
147 |
+
}
|
148 |
+
|
149 |
+
async initialize() {
|
150 |
+
try {
|
151 |
+
// First check what sample rates are supported with echo cancellation
|
152 |
+
const testStream = await navigator.mediaDevices.getUserMedia({
|
153 |
+
audio: {
|
154 |
+
echoCancellation: true,
|
155 |
+
noiseSuppression: true,
|
156 |
+
autoGainControl: true,
|
157 |
+
},
|
158 |
+
video: false,
|
159 |
+
});
|
160 |
+
|
161 |
+
// Get the actual sample rate from the system
|
162 |
+
const systemTrack = testStream.getAudioTracks()[0];
|
163 |
+
const settings = systemTrack.getSettings();
|
164 |
+
this.log("System audio settings:", settings);
|
165 |
+
|
166 |
+
// Clean up the test stream
|
167 |
+
testStream.getTracks().forEach((track) => track.stop());
|
168 |
+
|
169 |
+
// Now create the real stream using the system's capabilities
|
170 |
+
this.mediaStream = await navigator.mediaDevices.getUserMedia({
|
171 |
+
audio: {
|
172 |
+
channelCount: 1,
|
173 |
+
sampleRate: settings.sampleRate,
|
174 |
+
echoCancellation: true,
|
175 |
+
noiseSuppression: true,
|
176 |
+
autoGainControl: true,
|
177 |
+
echoCancellationType: "system",
|
178 |
+
latency: 0,
|
179 |
+
},
|
180 |
+
video: false,
|
181 |
+
});
|
182 |
+
|
183 |
+
// Log the actual constraints that were applied
|
184 |
+
const audioTrack = this.mediaStream.getAudioTracks()[0];
|
185 |
+
const actualConstraints = audioTrack.getSettings();
|
186 |
+
this.log("Applied audio constraints:", actualConstraints);
|
187 |
+
|
188 |
+
// Set up audio context matching the system rate
|
189 |
+
this.audioContext = new AudioContext({
|
190 |
+
sampleRate: settings.sampleRate,
|
191 |
+
});
|
192 |
+
this.log(
|
193 |
+
"AudioContext created with sample rate:",
|
194 |
+
this.audioContext.sampleRate
|
195 |
+
);
|
196 |
+
|
197 |
+
const micSource = this.audioContext.createMediaStreamSource(
|
198 |
+
this.mediaStream
|
199 |
+
);
|
200 |
+
|
201 |
+
this.processor = this.audioContext.createScriptProcessor(4096, 1, 1);
|
202 |
+
|
203 |
+
// Connect the audio nodes
|
204 |
+
micSource.connect(this.processor);
|
205 |
+
this.processor.connect(this.audioContext.destination);
|
206 |
+
|
207 |
+
return true;
|
208 |
+
} catch (error) {
|
209 |
+
this.error("Error initializing audio streamer:", error);
|
210 |
+
return false;
|
211 |
+
}
|
212 |
+
}
|
213 |
+
|
214 |
+
downsampleBuffer(buffer, originalSampleRate) {
|
215 |
+
if (originalSampleRate === this.targetSampleRate) {
|
216 |
+
return buffer;
|
217 |
+
}
|
218 |
+
|
219 |
+
const ratio = originalSampleRate / this.targetSampleRate;
|
220 |
+
const newLength = Math.floor(buffer.length / ratio);
|
221 |
+
const result = new Float32Array(newLength);
|
222 |
+
|
223 |
+
for (let i = 0; i < newLength; i++) {
|
224 |
+
const startIndex = Math.floor(i * ratio);
|
225 |
+
const endIndex = Math.floor((i + 1) * ratio);
|
226 |
+
let sum = 0;
|
227 |
+
let count = 0;
|
228 |
+
|
229 |
+
for (let j = startIndex; j < endIndex && j < buffer.length; j++) {
|
230 |
+
sum += buffer[j];
|
231 |
+
count++;
|
232 |
+
}
|
233 |
+
|
234 |
+
result[i] = count > 0 ? sum / count : 0;
|
235 |
+
}
|
236 |
+
|
237 |
+
this.log("Downsampling details:", {
|
238 |
+
originalRate: originalSampleRate,
|
239 |
+
targetRate: this.targetSampleRate,
|
240 |
+
originalLength: buffer.length,
|
241 |
+
newLength: result.length,
|
242 |
+
actualRatio: buffer.length / result.length,
|
243 |
+
});
|
244 |
+
|
245 |
+
return result;
|
246 |
+
}
|
247 |
+
|
248 |
+
addAudioDebugger(sourceNode, label) {
|
249 |
+
if (!this.debug) return;
|
250 |
+
|
251 |
+
const analyser = this.audioContext.createAnalyser();
|
252 |
+
analyser.fftSize = 2048;
|
253 |
+
sourceNode.connect(analyser);
|
254 |
+
|
255 |
+
const bufferLength = analyser.frequencyBinCount;
|
256 |
+
const dataArray = new Float32Array(bufferLength);
|
257 |
+
|
258 |
+
this.debugInterval = setInterval(() => {
|
259 |
+
if (!this.isStreaming) return;
|
260 |
+
|
261 |
+
analyser.getFloatTimeDomainData(dataArray);
|
262 |
+
let rms = 0;
|
263 |
+
for (let i = 0; i < bufferLength; i++) {
|
264 |
+
rms += dataArray[i] * dataArray[i];
|
265 |
+
}
|
266 |
+
rms = Math.sqrt(rms / bufferLength);
|
267 |
+
this.log(`${label} RMS Level: ${rms.toFixed(6)}`);
|
268 |
+
}, 1000);
|
269 |
+
}
|
270 |
+
|
271 |
+
start() {
|
272 |
+
this.isStreaming = true;
|
273 |
+
this.debugBuffer = [];
|
274 |
+
this.lastVoiceDetectedTime = 0;
|
275 |
+
this.isVoiceDetected = false;
|
276 |
+
this.consecutiveSilentFrames = 0;
|
277 |
+
|
278 |
+
this.processor.onaudioprocess = (event) => {
|
279 |
+
if (!this.isStreaming) return;
|
280 |
+
|
281 |
+
const inputData = event.inputBuffer.getChannelData(0);
|
282 |
+
const originalSampleRate = event.inputBuffer.sampleRate;
|
283 |
+
|
284 |
+
// Log initial processing details if needed
|
285 |
+
if (this.sequenceNumber === 0) {
|
286 |
+
this.log("Audio Processing Details:", {
|
287 |
+
bufferSize: this.processor.bufferSize,
|
288 |
+
inputChannels: this.processor.numberOfInputs,
|
289 |
+
outputChannels: this.processor.numberOfOutputs,
|
290 |
+
originalSampleRate: originalSampleRate,
|
291 |
+
targetSampleRate: this.targetSampleRate,
|
292 |
+
length: inputData.length,
|
293 |
+
timestamp: event.timeStamp,
|
294 |
+
});
|
295 |
+
}
|
296 |
+
|
297 |
+
// Check for voice activity if enabled
|
298 |
+
if (this.voiceDetectionEnabled && !this.isVoiceFrame(inputData)) {
|
299 |
+
// Skip this frame if no voice is detected
|
300 |
+
this.sequenceNumber++; // Still increment to maintain sequence
|
301 |
+
return;
|
302 |
+
}
|
303 |
+
|
304 |
+
const downsampledData = this.downsampleBuffer(
|
305 |
+
inputData,
|
306 |
+
originalSampleRate
|
307 |
+
);
|
308 |
+
|
309 |
+
const processedData = new Float32Array(downsampledData.length);
|
310 |
+
const gain = 5.0;
|
311 |
+
for (let i = 0; i < downsampledData.length; i++) {
|
312 |
+
processedData[i] = downsampledData[i] * gain;
|
313 |
+
}
|
314 |
+
|
315 |
+
// Debug logging
|
316 |
+
if (this.sequenceNumber % 50 === 0 && this.debug) {
|
317 |
+
const stats = {
|
318 |
+
originalLength: inputData.length,
|
319 |
+
downsampledLength: downsampledData.length,
|
320 |
+
maxValue: Math.max(...processedData),
|
321 |
+
minValue: Math.min(...processedData),
|
322 |
+
originalSampleRate,
|
323 |
+
targetSampleRate: this.targetSampleRate,
|
324 |
+
isVoiceDetected: this.isVoiceDetected,
|
325 |
+
};
|
326 |
+
this.log("Audio buffer stats:", stats);
|
327 |
+
}
|
328 |
+
|
329 |
+
// Store in debug buffer
|
330 |
+
this.debugBuffer.push(processedData);
|
331 |
+
if (this.debugBuffer.length > this.debugBufferSize) {
|
332 |
+
this.debugBuffer.shift();
|
333 |
+
}
|
334 |
+
|
335 |
+
// Audio level monitoring
|
336 |
+
let rms = 0;
|
337 |
+
for (let i = 0; i < processedData.length; i++) {
|
338 |
+
rms += processedData[i] * processedData[i];
|
339 |
+
}
|
340 |
+
rms = Math.sqrt(rms / processedData.length);
|
341 |
+
|
342 |
+
if (this.sequenceNumber % 10 === 0 && this.debug) {
|
343 |
+
this.log(
|
344 |
+
`Audio Level (RMS): ${rms.toFixed(4)}, Voice Detected: ${
|
345 |
+
this.isVoiceDetected
|
346 |
+
}`
|
347 |
+
);
|
348 |
+
if (rms < 0.0001) {
|
349 |
+
this.warn(
|
350 |
+
"Warning: Very low audio level detected. Check if microphone is working."
|
351 |
+
);
|
352 |
+
}
|
353 |
+
}
|
354 |
+
|
355 |
+
// Convert to Int16Array for transmission
|
356 |
+
const intData = new Int16Array(processedData.length);
|
357 |
+
for (let i = 0; i < processedData.length; i++) {
|
358 |
+
intData[i] = Math.max(
|
359 |
+
-32768,
|
360 |
+
Math.min(32767, processedData[i] * 32768)
|
361 |
+
);
|
362 |
+
|
363 |
+
if (this.sequenceNumber % 100 === 0 && i < 10 && this.debug) {
|
364 |
+
this.log(
|
365 |
+
`Sample ${i}: Float=${processedData[i].toFixed(4)}, Int16=${
|
366 |
+
intData[i]
|
367 |
+
}`
|
368 |
+
);
|
369 |
+
}
|
370 |
+
}
|
371 |
+
|
372 |
+
// Convert to base64 and dispatch
|
373 |
+
const bytes = new Uint8Array(intData.buffer);
|
374 |
+
const base64Data = btoa(
|
375 |
+
Array.from(bytes)
|
376 |
+
.map((byte) => String.fromCharCode(byte))
|
377 |
+
.join("")
|
378 |
+
);
|
379 |
+
|
380 |
+
this.dispatchEvent(
|
381 |
+
new MesopEvent(this.dataEvent, {
|
382 |
+
sequence: this.sequenceNumber++,
|
383 |
+
sampleRate: this.targetSampleRate,
|
384 |
+
data: base64Data,
|
385 |
+
isVoice: this.isVoiceDetected,
|
386 |
+
})
|
387 |
+
);
|
388 |
+
|
389 |
+
this.dispatchEvent(
|
390 |
+
new CustomEvent("audio-input-received", {
|
391 |
+
detail: { data: base64Data },
|
392 |
+
// Allow event to cross shadow DOM boundaries (both need to be true)
|
393 |
+
bubbles: true,
|
394 |
+
composed: true,
|
395 |
+
})
|
396 |
+
);
|
397 |
+
};
|
398 |
+
|
399 |
+
return true;
|
400 |
+
}
|
401 |
+
|
402 |
+
stop() {
|
403 |
+
this.isStreaming = false;
|
404 |
+
this.isRecording = false;
|
405 |
+
|
406 |
+
this.dispatchEvent(new MesopEvent(this.stateChangeEvent, "disabled"));
|
407 |
+
|
408 |
+
if (this.debugInterval) {
|
409 |
+
clearInterval(this.debugInterval);
|
410 |
+
}
|
411 |
+
|
412 |
+
if (this.processor) {
|
413 |
+
this.processor.onaudioprocess = null;
|
414 |
+
}
|
415 |
+
|
416 |
+
if (this.mediaStream) {
|
417 |
+
this.mediaStream.getTracks().forEach((track) => track.stop());
|
418 |
+
}
|
419 |
+
|
420 |
+
if (this.audioContext) {
|
421 |
+
this.audioContext.close();
|
422 |
+
}
|
423 |
+
}
|
424 |
+
|
425 |
+
async playbackDebug() {
|
426 |
+
if (!this.debugBuffer.length) {
|
427 |
+
this.log("No audio data available for playback");
|
428 |
+
return;
|
429 |
+
}
|
430 |
+
|
431 |
+
const playbackContext = new AudioContext();
|
432 |
+
const systemSampleRate = playbackContext.sampleRate;
|
433 |
+
|
434 |
+
const totalSamples16k =
|
435 |
+
this.debugBuffer.length * this.debugBuffer[0].length;
|
436 |
+
|
437 |
+
const upsampledLength = Math.round(
|
438 |
+
totalSamples16k * (systemSampleRate / this.targetSampleRate)
|
439 |
+
);
|
440 |
+
|
441 |
+
const audioBuffer = playbackContext.createBuffer(
|
442 |
+
1,
|
443 |
+
upsampledLength,
|
444 |
+
systemSampleRate
|
445 |
+
);
|
446 |
+
|
447 |
+
const channelData = audioBuffer.getChannelData(0);
|
448 |
+
|
449 |
+
const combined16kBuffer = new Float32Array(totalSamples16k);
|
450 |
+
let offset = 0;
|
451 |
+
for (let i = 0; i < this.debugBuffer.length; i++) {
|
452 |
+
combined16kBuffer.set(this.debugBuffer[i], offset);
|
453 |
+
offset += this.debugBuffer[i].length;
|
454 |
+
}
|
455 |
+
|
456 |
+
const ratio = this.targetSampleRate / systemSampleRate;
|
457 |
+
for (let i = 0; i < upsampledLength; i++) {
|
458 |
+
const position = i * ratio;
|
459 |
+
const index = Math.floor(position);
|
460 |
+
const decimal = position - index;
|
461 |
+
|
462 |
+
const sample1 = combined16kBuffer[index] || 0;
|
463 |
+
const sample2 = combined16kBuffer[index + 1] || sample1;
|
464 |
+
channelData[i] = sample1 + decimal * (sample2 - sample1);
|
465 |
+
}
|
466 |
+
|
467 |
+
const source = playbackContext.createBufferSource();
|
468 |
+
source.buffer = audioBuffer;
|
469 |
+
source.connect(playbackContext.destination);
|
470 |
+
source.start();
|
471 |
+
this.log("Playing debug audio at system rate...", {
|
472 |
+
systemSampleRate,
|
473 |
+
originalLength: totalSamples16k,
|
474 |
+
upsampledLength,
|
475 |
+
});
|
476 |
+
|
477 |
+
source.onended = () => {
|
478 |
+
this.log("Debug playback finished");
|
479 |
+
playbackContext.close();
|
480 |
+
};
|
481 |
+
}
|
482 |
+
|
483 |
+
render() {
|
484 |
+
if (this.isInitializing) {
|
485 |
+
return html`<span><slot></slot></span>`;
|
486 |
+
}
|
487 |
+
|
488 |
+
if (this.isRecording) {
|
489 |
+
return html`<span @click="${this.stop}"><slot></slot></span> `;
|
490 |
+
}
|
491 |
+
|
492 |
+
return html`<span @click="${this.startStreaming}"><slot></slot></span>`;
|
493 |
+
}
|
494 |
+
}
|
495 |
+
|
496 |
+
customElements.define("audio-recorder", AudioRecorder);
|
web_components/audio_recorder.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Callable, Literal
|
2 |
+
|
3 |
+
import mesop.labs as mel
|
4 |
+
|
5 |
+
|
6 |
+
@mel.web_component(path="./audio_recorder.js")
|
7 |
+
def audio_recorder(
|
8 |
+
*,
|
9 |
+
state: Literal["disabled", "initializing", "recording"] = "disabled",
|
10 |
+
on_data: Callable[[mel.WebEvent], Any] | None = None,
|
11 |
+
on_state_change: Callable[[mel.WebEvent], Any] | None = None,
|
12 |
+
):
|
13 |
+
"""Records audio and streams audio to the Mesop server.
|
14 |
+
|
15 |
+
This web components is designed to work with `MESOP_WEBSOCKETS_ENABLED=true`.
|
16 |
+
|
17 |
+
The `on_data` event returns continuous chunk of audio in base64-encoded PCM format
|
18 |
+
with 16000hz sampling rate. For some reason the Gemini Live API only accepts the PCM
|
19 |
+
data 16000hz. At 48000hz, nothing is returned. Perhaps there is a setting to override
|
20 |
+
the expected sampling rate when sent to the Gemini Live API. Unfortunately, the docs
|
21 |
+
are very sparse right now.
|
22 |
+
|
23 |
+
The data event looks like:
|
24 |
+
|
25 |
+
{
|
26 |
+
"data": <base64-encoded-string>
|
27 |
+
}
|
28 |
+
"""
|
29 |
+
return mel.insert_web_component(
|
30 |
+
name="audio-recorder",
|
31 |
+
events=_filter_events(
|
32 |
+
{
|
33 |
+
"dataEvent": on_data,
|
34 |
+
"stateChangeEvent": on_state_change,
|
35 |
+
}
|
36 |
+
),
|
37 |
+
properties={
|
38 |
+
"state": state,
|
39 |
+
},
|
40 |
+
)
|
41 |
+
|
42 |
+
|
43 |
+
def _filter_events(events: dict[str, Callable[[mel.WebEvent], Any] | None]):
|
44 |
+
return {event: callback for event, callback in events.items() if callback}
|
web_components/gemini_live_connection.js
ADDED
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import {
|
2 |
+
LitElement,
|
3 |
+
html,
|
4 |
+
} from "https://cdn.jsdelivr.net/gh/lit/dist@3/core/lit-core.min.js";
|
5 |
+
|
6 |
+
class GeminiLiveConnection extends LitElement {
|
7 |
+
static properties = {
|
8 |
+
api_config: { type: String },
|
9 |
+
enabled: { type: Boolean },
|
10 |
+
endpoint: { type: String },
|
11 |
+
startEvent: { type: String },
|
12 |
+
stopEvent: { type: String },
|
13 |
+
text_input: { type: String },
|
14 |
+
toolCallEvent: { type: String },
|
15 |
+
tool_call_responses: { type: String },
|
16 |
+
};
|
17 |
+
|
18 |
+
constructor() {
|
19 |
+
super();
|
20 |
+
this.onSetupComplete = () => {
|
21 |
+
console.log("Setup complete...");
|
22 |
+
};
|
23 |
+
this.onAudioData = (base64Data) => {
|
24 |
+
this.dispatchEvent(
|
25 |
+
new CustomEvent("audio-output-received", {
|
26 |
+
detail: { data: base64Data },
|
27 |
+
// Allow event to cross shadow DOM boundaries (both need to be true)
|
28 |
+
bubbles: true,
|
29 |
+
composed: true,
|
30 |
+
})
|
31 |
+
);
|
32 |
+
};
|
33 |
+
this.onInterrupted = () => {};
|
34 |
+
this.onTurnComplete = () => {};
|
35 |
+
this.onError = () => {};
|
36 |
+
this.onClose = () => {
|
37 |
+
console.log("Web socket closed...");
|
38 |
+
};
|
39 |
+
this.onToolCall = (toolCalls) => {
|
40 |
+
this.dispatchEvent(
|
41 |
+
new MesopEvent(this.toolCallEvent, {
|
42 |
+
toolCalls: JSON.stringify(toolCalls.functionCalls),
|
43 |
+
})
|
44 |
+
);
|
45 |
+
};
|
46 |
+
this.pendingSetupMessage = null;
|
47 |
+
|
48 |
+
this.onAudioInputReceived = (e) => {
|
49 |
+
this.sendAudioChunk(e.detail.data);
|
50 |
+
};
|
51 |
+
}
|
52 |
+
|
53 |
+
connectedCallback() {
|
54 |
+
super.connectedCallback();
|
55 |
+
// Start listening for events when component is connected
|
56 |
+
window.addEventListener("audio-input-received", this.onAudioInputReceived);
|
57 |
+
}
|
58 |
+
|
59 |
+
disconnectedCallback() {
|
60 |
+
super.disconnectedCallback();
|
61 |
+
window.removeEventListener(
|
62 |
+
"audio-input-received",
|
63 |
+
this.onAudioInputReceived
|
64 |
+
);
|
65 |
+
if (this.ws) {
|
66 |
+
this.ws.close();
|
67 |
+
}
|
68 |
+
}
|
69 |
+
|
70 |
+
firstUpdated() {
|
71 |
+
if (this.enabled) {
|
72 |
+
this.setupWebSocket();
|
73 |
+
}
|
74 |
+
}
|
75 |
+
|
76 |
+
updated(changedProperties) {
|
77 |
+
if (
|
78 |
+
changedProperties.has("tool_call_responses") &&
|
79 |
+
this.tool_call_responses.length > 0
|
80 |
+
) {
|
81 |
+
this.sendToolResponse(JSON.parse(this.tool_call_responses));
|
82 |
+
}
|
83 |
+
if (changedProperties.has("text_input") && this.text_input.length > 0) {
|
84 |
+
this.sendTextMessage(this.text_input);
|
85 |
+
}
|
86 |
+
}
|
87 |
+
|
88 |
+
start() {
|
89 |
+
if (!this.enabled) {
|
90 |
+
this.dispatchEvent(new MesopEvent(this.startEvent, {}));
|
91 |
+
this.dispatchEvent(
|
92 |
+
new CustomEvent("gemini-live-api-started", {
|
93 |
+
detail: {},
|
94 |
+
// Allow event to cross shadow DOM boundaries (both need to be true)
|
95 |
+
bubbles: true,
|
96 |
+
composed: true,
|
97 |
+
})
|
98 |
+
);
|
99 |
+
}
|
100 |
+
this.setupWebSocket();
|
101 |
+
}
|
102 |
+
|
103 |
+
stop() {
|
104 |
+
this.dispatchEvent(new MesopEvent(this.stopEvent, {}));
|
105 |
+
this.dispatchEvent(
|
106 |
+
new CustomEvent("gemini-live-api-stopped", {
|
107 |
+
detail: {},
|
108 |
+
// Allow event to cross shadow DOM boundaries (both need to be true)
|
109 |
+
bubbles: true,
|
110 |
+
composed: true,
|
111 |
+
})
|
112 |
+
);
|
113 |
+
if (this.ws) {
|
114 |
+
this.ws.close();
|
115 |
+
}
|
116 |
+
}
|
117 |
+
|
118 |
+
setupWebSocket() {
|
119 |
+
this.ws = new WebSocket(this.endpoint);
|
120 |
+
this.ws.onopen = () => {
|
121 |
+
console.log("WebSocket connection is opening...");
|
122 |
+
this.sendSetupMessage();
|
123 |
+
};
|
124 |
+
|
125 |
+
this.ws.onmessage = async (event) => {
|
126 |
+
try {
|
127 |
+
let wsResponse;
|
128 |
+
if (event.data instanceof Blob) {
|
129 |
+
const responseText = await event.data.text();
|
130 |
+
wsResponse = JSON.parse(responseText);
|
131 |
+
} else {
|
132 |
+
wsResponse = JSON.parse(event.data);
|
133 |
+
}
|
134 |
+
|
135 |
+
if (wsResponse.setupComplete) {
|
136 |
+
this.onSetupComplete();
|
137 |
+
} else if (wsResponse.toolCall) {
|
138 |
+
this.onToolCall(wsResponse.toolCall);
|
139 |
+
} else if (wsResponse.serverContent) {
|
140 |
+
if (wsResponse.serverContent.interrupted) {
|
141 |
+
this.onInterrupted();
|
142 |
+
return;
|
143 |
+
}
|
144 |
+
|
145 |
+
if (wsResponse.serverContent.modelTurn?.parts?.[0]?.inlineData) {
|
146 |
+
const audioData =
|
147 |
+
wsResponse.serverContent.modelTurn.parts[0].inlineData.data;
|
148 |
+
this.onAudioData(audioData);
|
149 |
+
|
150 |
+
if (!wsResponse.serverContent.turnComplete) {
|
151 |
+
this.sendContinueSignal();
|
152 |
+
}
|
153 |
+
}
|
154 |
+
|
155 |
+
if (wsResponse.serverContent.turnComplete) {
|
156 |
+
this.onTurnComplete();
|
157 |
+
}
|
158 |
+
}
|
159 |
+
} catch (error) {
|
160 |
+
console.error("Error parsing response:", error);
|
161 |
+
this.onError("Error parsing response: " + error.message);
|
162 |
+
}
|
163 |
+
};
|
164 |
+
|
165 |
+
this.ws.onerror = (error) => {
|
166 |
+
console.error("WebSocket Error:", error);
|
167 |
+
this.onError("WebSocket Error: " + error.message);
|
168 |
+
};
|
169 |
+
|
170 |
+
this.ws.onclose = (event) => {
|
171 |
+
console.log("Connection closed:", event);
|
172 |
+
this.onClose(event);
|
173 |
+
};
|
174 |
+
}
|
175 |
+
|
176 |
+
sendMessage(message) {
|
177 |
+
if (this.ws.readyState === WebSocket.OPEN) {
|
178 |
+
this.ws.send(JSON.stringify(message));
|
179 |
+
} else {
|
180 |
+
console.error(
|
181 |
+
"WebSocket is not open. Current state:",
|
182 |
+
this.ws.readyState
|
183 |
+
);
|
184 |
+
this.onError("WebSocket is not ready. Please try again.");
|
185 |
+
}
|
186 |
+
}
|
187 |
+
|
188 |
+
sendSetupMessage() {
|
189 |
+
if (this.ws.readyState === WebSocket.OPEN) {
|
190 |
+
this.ws.send(this.api_config);
|
191 |
+
} else {
|
192 |
+
console.error("Connection not ready.");
|
193 |
+
}
|
194 |
+
}
|
195 |
+
|
196 |
+
sendAudioChunk(base64Audio) {
|
197 |
+
const message = {
|
198 |
+
realtime_input: {
|
199 |
+
media_chunks: [
|
200 |
+
{
|
201 |
+
mime_type: "audio/pcm",
|
202 |
+
data: base64Audio,
|
203 |
+
},
|
204 |
+
],
|
205 |
+
},
|
206 |
+
};
|
207 |
+
this.sendMessage(message);
|
208 |
+
}
|
209 |
+
|
210 |
+
sendEndMessage() {
|
211 |
+
const message = {
|
212 |
+
client_content: {
|
213 |
+
turns: [
|
214 |
+
{
|
215 |
+
role: "user",
|
216 |
+
parts: [],
|
217 |
+
},
|
218 |
+
],
|
219 |
+
turn_complete: true,
|
220 |
+
},
|
221 |
+
};
|
222 |
+
this.sendMessage(message);
|
223 |
+
}
|
224 |
+
|
225 |
+
sendContinueSignal() {
|
226 |
+
const message = {
|
227 |
+
client_content: {
|
228 |
+
turns: [
|
229 |
+
{
|
230 |
+
role: "user",
|
231 |
+
parts: [],
|
232 |
+
},
|
233 |
+
],
|
234 |
+
turn_complete: false,
|
235 |
+
},
|
236 |
+
};
|
237 |
+
this.sendMessage(message);
|
238 |
+
}
|
239 |
+
|
240 |
+
sendTextMessage(text) {
|
241 |
+
this.sendMessage({
|
242 |
+
client_content: {
|
243 |
+
turn_complete: true,
|
244 |
+
turns: [{ role: "user", parts: [{ text: text }] }],
|
245 |
+
},
|
246 |
+
});
|
247 |
+
}
|
248 |
+
|
249 |
+
sendToolResponse(functionResponses) {
|
250 |
+
const toolResponse = {
|
251 |
+
tool_response: {
|
252 |
+
function_responses: functionResponses,
|
253 |
+
},
|
254 |
+
};
|
255 |
+
this.sendMessage(toolResponse);
|
256 |
+
}
|
257 |
+
|
258 |
+
async ensureConnected() {
|
259 |
+
if (this.ws.readyState === WebSocket.OPEN) {
|
260 |
+
return;
|
261 |
+
}
|
262 |
+
|
263 |
+
return new Promise((resolve, reject) => {
|
264 |
+
const timeout = setTimeout(() => {
|
265 |
+
reject(new Error("Connection timeout"));
|
266 |
+
}, 5000);
|
267 |
+
|
268 |
+
const onOpen = () => {
|
269 |
+
clearTimeout(timeout);
|
270 |
+
this.ws.removeEventListener("open", onOpen);
|
271 |
+
this.ws.removeEventListener("error", onError);
|
272 |
+
resolve();
|
273 |
+
};
|
274 |
+
|
275 |
+
const onError = (error) => {
|
276 |
+
clearTimeout(timeout);
|
277 |
+
this.ws.removeEventListener("open", onOpen);
|
278 |
+
this.ws.removeEventListener("error", onError);
|
279 |
+
reject(error);
|
280 |
+
};
|
281 |
+
|
282 |
+
this.ws.addEventListener("open", onOpen);
|
283 |
+
this.ws.addEventListener("error", onError);
|
284 |
+
});
|
285 |
+
}
|
286 |
+
|
287 |
+
render() {
|
288 |
+
if (this.enabled) {
|
289 |
+
return html`<span @click="${this.stop}"><slot></slot></span>`;
|
290 |
+
} else {
|
291 |
+
return html`<span @click="${this.start}"><slot></slot></span>`;
|
292 |
+
}
|
293 |
+
}
|
294 |
+
}
|
295 |
+
|
296 |
+
customElements.define("gemini-live-connection", GeminiLiveConnection);
|
web_components/gemini_live_connection.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Callable
|
2 |
+
|
3 |
+
import mesop.labs as mel
|
4 |
+
|
5 |
+
|
6 |
+
_HOST = "generativelanguage.googleapis.com"
|
7 |
+
|
8 |
+
_GEMINI_BIDI_WEBSOCKET_URI = "wss://{host}/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent?key={api_key}"
|
9 |
+
|
10 |
+
|
11 |
+
@mel.web_component(path="./gemini_live_connection.js")
|
12 |
+
def gemini_live_connection(
|
13 |
+
*,
|
14 |
+
enabled: bool = False,
|
15 |
+
api_key: str = "",
|
16 |
+
api_config: str = "",
|
17 |
+
on_start: Callable[[mel.WebEvent], Any] | None = None,
|
18 |
+
on_stop: Callable[[mel.WebEvent], Any] | None = None,
|
19 |
+
on_tool_call: Callable[[mel.WebEvent], Any] | None = None,
|
20 |
+
tool_call_responses: str = "",
|
21 |
+
text_input: str = "",
|
22 |
+
):
|
23 |
+
return mel.insert_web_component(
|
24 |
+
name="gemini-live-connection",
|
25 |
+
events=_filter_events(
|
26 |
+
{
|
27 |
+
"startEvent": on_start,
|
28 |
+
"stopEvent": on_stop,
|
29 |
+
"toolCallEvent": on_tool_call,
|
30 |
+
}
|
31 |
+
),
|
32 |
+
properties={
|
33 |
+
"api_config": api_config,
|
34 |
+
"enabled": enabled,
|
35 |
+
"endpoint": _GEMINI_BIDI_WEBSOCKET_URI.format(host=_HOST, api_key=api_key),
|
36 |
+
"tool_call_responses": tool_call_responses,
|
37 |
+
"text_input": text_input,
|
38 |
+
},
|
39 |
+
)
|
40 |
+
|
41 |
+
|
42 |
+
def _filter_events(events: dict[str, Callable[[mel.WebEvent], Any] | None]):
|
43 |
+
return {event: callback for event, callback in events.items() if callback}
|
web_components/video_recorder.js
ADDED
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import {
|
2 |
+
LitElement,
|
3 |
+
html,
|
4 |
+
css,
|
5 |
+
} from "https://cdn.jsdelivr.net/gh/lit/dist@3/core/lit-core.min.js";
|
6 |
+
|
7 |
+
class VideoRecorder extends LitElement {
|
8 |
+
static styles = css`
|
9 |
+
:host {
|
10 |
+
display: block;
|
11 |
+
}
|
12 |
+
|
13 |
+
.video-container {
|
14 |
+
position: relative;
|
15 |
+
width: 100%;
|
16 |
+
max-width: 640px;
|
17 |
+
margin: 0 auto;
|
18 |
+
}
|
19 |
+
|
20 |
+
video {
|
21 |
+
width: 100%;
|
22 |
+
height: auto;
|
23 |
+
background: #000;
|
24 |
+
}
|
25 |
+
|
26 |
+
.controls {
|
27 |
+
margin-top: 10px;
|
28 |
+
text-align: center;
|
29 |
+
}
|
30 |
+
|
31 |
+
button {
|
32 |
+
padding: 8px 16px;
|
33 |
+
font-size: 16px;
|
34 |
+
cursor: pointer;
|
35 |
+
}
|
36 |
+
`;
|
37 |
+
|
38 |
+
static properties = {
|
39 |
+
dataEvent: { type: String },
|
40 |
+
recordEvent: { type: String },
|
41 |
+
isRecording: { type: Boolean },
|
42 |
+
enabled: { type: Boolean },
|
43 |
+
quality: { type: Number },
|
44 |
+
fps: { type: Number },
|
45 |
+
showPreview: { type: Boolean },
|
46 |
+
};
|
47 |
+
|
48 |
+
constructor() {
|
49 |
+
super();
|
50 |
+
this.debug = false;
|
51 |
+
this.mediaStream = null;
|
52 |
+
this.isStreaming = false;
|
53 |
+
this.isRecording = false;
|
54 |
+
this.isInitializing = false;
|
55 |
+
this.enabled = false;
|
56 |
+
this.quality = 0.8; // JPEG quality
|
57 |
+
this.fps = 2; // Frames per second
|
58 |
+
this.showPreview = true; // Enable preview by default
|
59 |
+
|
60 |
+
// Setup canvas and video elements
|
61 |
+
this.video = document.createElement("video");
|
62 |
+
this.video.setAttribute("playsinline", ""); // Better mobile support
|
63 |
+
this.video.setAttribute("autoplay", "");
|
64 |
+
this.video.setAttribute("muted", "");
|
65 |
+
this.canvas = document.createElement("canvas");
|
66 |
+
this.ctx = this.canvas.getContext("2d");
|
67 |
+
this.captureInterval = null;
|
68 |
+
}
|
69 |
+
|
70 |
+
disconnectedCallback() {
|
71 |
+
this.stop();
|
72 |
+
super.disconnectedCallback();
|
73 |
+
}
|
74 |
+
|
75 |
+
firstUpdated() {
|
76 |
+
if (this.enabled) {
|
77 |
+
this.startStreaming();
|
78 |
+
}
|
79 |
+
}
|
80 |
+
|
81 |
+
log(...args) {
|
82 |
+
if (this.debug) {
|
83 |
+
console.log(...args);
|
84 |
+
}
|
85 |
+
}
|
86 |
+
|
87 |
+
warn(...args) {
|
88 |
+
if (this.debug) {
|
89 |
+
console.warn(...args);
|
90 |
+
}
|
91 |
+
}
|
92 |
+
|
93 |
+
error(...args) {
|
94 |
+
if (this.debug) {
|
95 |
+
console.error(...args);
|
96 |
+
}
|
97 |
+
}
|
98 |
+
|
99 |
+
async startStreaming() {
|
100 |
+
if (!this.enabled) {
|
101 |
+
// this.dispatchEvent(new MesopEvent(this.recordEvent, {}));
|
102 |
+
}
|
103 |
+
this.isInitializing = true;
|
104 |
+
const initialized = await this.initialize();
|
105 |
+
this.isInitializing = false;
|
106 |
+
if (initialized) {
|
107 |
+
this.isRecording = true;
|
108 |
+
this.start();
|
109 |
+
}
|
110 |
+
}
|
111 |
+
|
112 |
+
async initialize() {
|
113 |
+
try {
|
114 |
+
this.mediaStream = await navigator.mediaDevices.getUserMedia({
|
115 |
+
video: {
|
116 |
+
width: { ideal: 1280 },
|
117 |
+
height: { ideal: 720 },
|
118 |
+
},
|
119 |
+
});
|
120 |
+
|
121 |
+
this.video.srcObject = this.mediaStream;
|
122 |
+
await this.video.play();
|
123 |
+
|
124 |
+
// Wait for video to be ready
|
125 |
+
await new Promise((resolve) => {
|
126 |
+
this.video.onloadedmetadata = () => {
|
127 |
+
this.canvas.width = this.video.videoWidth;
|
128 |
+
this.canvas.height = this.video.videoHeight;
|
129 |
+
resolve();
|
130 |
+
};
|
131 |
+
});
|
132 |
+
|
133 |
+
// Request a redraw to show the video preview
|
134 |
+
this.requestUpdate();
|
135 |
+
return true;
|
136 |
+
} catch (error) {
|
137 |
+
this.error("Error accessing webcam:", error);
|
138 |
+
return false;
|
139 |
+
}
|
140 |
+
}
|
141 |
+
|
142 |
+
captureFrame() {
|
143 |
+
if (!this.mediaStream) {
|
144 |
+
this.error("Webcam not started");
|
145 |
+
return null;
|
146 |
+
}
|
147 |
+
|
148 |
+
// Draw current video frame to canvas
|
149 |
+
this.ctx.drawImage(this.video, 0, 0);
|
150 |
+
|
151 |
+
// Convert to JPEG and base64 encode
|
152 |
+
const base64Data = this.canvas.toDataURL("image/jpeg", this.quality);
|
153 |
+
|
154 |
+
// Remove the data URL prefix to get just the base64 data
|
155 |
+
return base64Data.replace("data:image/jpeg;base64,", "");
|
156 |
+
}
|
157 |
+
|
158 |
+
start() {
|
159 |
+
this.isStreaming = true;
|
160 |
+
|
161 |
+
// Start capturing frames at specified FPS
|
162 |
+
const intervalMs = 1000 / this.fps;
|
163 |
+
this.captureInterval = setInterval(() => {
|
164 |
+
const base64Frame = this.captureFrame();
|
165 |
+
if (base64Frame) {
|
166 |
+
this.dispatchEvent(
|
167 |
+
new MesopEvent(this.dataEvent, {
|
168 |
+
data: base64Frame,
|
169 |
+
})
|
170 |
+
);
|
171 |
+
}
|
172 |
+
}, intervalMs);
|
173 |
+
|
174 |
+
return true;
|
175 |
+
}
|
176 |
+
|
177 |
+
stop() {
|
178 |
+
this.isStreaming = false;
|
179 |
+
this.isRecording = false;
|
180 |
+
|
181 |
+
if (this.captureInterval) {
|
182 |
+
clearInterval(this.captureInterval);
|
183 |
+
this.captureInterval = null;
|
184 |
+
}
|
185 |
+
|
186 |
+
if (this.mediaStream) {
|
187 |
+
this.mediaStream.getTracks().forEach((track) => track.stop());
|
188 |
+
this.mediaStream = null;
|
189 |
+
}
|
190 |
+
|
191 |
+
// Clear video source
|
192 |
+
if (this.video.srcObject) {
|
193 |
+
this.video.srcObject = null;
|
194 |
+
}
|
195 |
+
}
|
196 |
+
|
197 |
+
render() {
|
198 |
+
return html`
|
199 |
+
<div class="video-container">
|
200 |
+
${this.showPreview && (this.isRecording || this.isInitializing)
|
201 |
+
? html`<video
|
202 |
+
.srcObject="${this.mediaStream}"
|
203 |
+
playsinline
|
204 |
+
autoplay
|
205 |
+
muted
|
206 |
+
></video>`
|
207 |
+
: null}
|
208 |
+
|
209 |
+
<div class="controls">
|
210 |
+
${this.isInitializing
|
211 |
+
? html`<div>Initializing video recorder...</div>`
|
212 |
+
: this.isRecording
|
213 |
+
? html`<button @click="${this.stop}">Stop Recording</button>`
|
214 |
+
: html`<button @click="${this.startStreaming}">
|
215 |
+
Start Recording
|
216 |
+
</button>`}
|
217 |
+
</div>
|
218 |
+
</div>
|
219 |
+
`;
|
220 |
+
}
|
221 |
+
}
|
222 |
+
|
223 |
+
customElements.define("video-recorder", VideoRecorder);
|
web_components/video_recorder.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Callable
|
2 |
+
|
3 |
+
import mesop.labs as mel
|
4 |
+
|
5 |
+
|
6 |
+
@mel.web_component(path="./video_recorder.js")
|
7 |
+
def video_recorder(
|
8 |
+
*,
|
9 |
+
enabled: bool = False,
|
10 |
+
on_data: Callable[[mel.WebEvent], Any],
|
11 |
+
on_record: Callable[[mel.WebEvent], Any],
|
12 |
+
):
|
13 |
+
"""Records video and streams video to the Mesop server.
|
14 |
+
|
15 |
+
This web components is designed to work with `MESOP_WEBSOCKETS_ENABLED=true`.
|
16 |
+
|
17 |
+
The data event looks like:
|
18 |
+
|
19 |
+
{
|
20 |
+
"data": <base64-encoded-string>
|
21 |
+
}
|
22 |
+
"""
|
23 |
+
return mel.insert_web_component(
|
24 |
+
name="video-recorder",
|
25 |
+
events={
|
26 |
+
"dataEvent": on_data,
|
27 |
+
"recordEvent": on_record,
|
28 |
+
},
|
29 |
+
properties={
|
30 |
+
"enabled": enabled,
|
31 |
+
},
|
32 |
+
)
|