Spaces:

richard-to
/

mesop-jeopardy-live

Running

App Files Files Community

Richard commited on Jan 26

Commit

09ed935

0 Parent(s):

Initial commit

Browse files

Files changed (18) hide show

.gitignore +16 -0
README.md +0 -0
css.py +152 -0
main.py +381 -0
models.py +17 -0
question_bank.py +132 -0
requirements.txt +6 -0
ruff.toml +2 -0
state.py +41 -0
trebek_bot.py +91 -0
web_components/audio_player.js +172 -0
web_components/audio_player.py +40 -0
web_components/audio_recorder.js +496 -0
web_components/audio_recorder.py +44 -0
web_components/gemini_live_connection.js +296 -0
web_components/gemini_live_connection.py +43 -0
web_components/video_recorder.js +223 -0
web_components/video_recorder.py +32 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,16 @@

+# System
+.DS_Store
+# Python
+__pycache__
+# VS Code
+*.code-workspace
+.pytest_cache
+# Dyad
+.dyad
+# App
+.env
+data

README.md ADDED Viewed

File without changes

css.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import mesop as me
+from state import State
+COLOR_BLUE = "blue"
+COLOR_YELLOW = "#f0cd6e"
+COLOR_RED = "#cc153c"
+COLOR_DISABLED = "#e4e4e4"
+COLOR_DISABLED_BUTTON_BG = "#ccc"
+MAIN_COL_GRID = me.Style(
+  background="#ececec",
+  display="grid",
+  grid_template_columns="70% 30%",
+  height="100vh",
+)
+SIDEBAR = me.Style(
+  color="#111",
+  overflow_y="scroll",
+  padding=me.Padding.all(20),
+)
+SIDEBAR_SECTION = me.Style(margin=me.Margin(bottom=15))
+TOOLBAR_SECTION = me.Style(
+  margin=me.Margin(bottom=15),
+  padding=me.Padding.all(5),
+  background=me.theme_var("surface-container-highest"),
+  justify_content="space-evenly",
+  display="flex",
+  flex_direction="row",
+)
+TEXT_INPUT = me.Style(width="100%")
+def sidebar_header() -> me.Style:
+  state = me.state(State)
+  return me.Style(color="#000" if state.gemini_live_api_enabled else "#aaa")
+def game_button() -> me.Style:
+  state = me.state(State)
+  if not state.api_key:
+    return me.Style()
+  if state.gemini_live_api_enabled:
+    return me.Style(background=me.theme_var("error"), color=me.theme_var("on-error"))
+  return me.Style(background=me.theme_var("primary"), color=me.theme_var("on-primary"))
+def audio_button() -> me.Style:
+  state = me.state(State)
+  if state.audio_player_enabled:
+    return me.Style(background=me.theme_var("tertiary"), color=me.theme_var("on-tertiary"))
+  return me.Style()
+def mic_button() -> me.Style:
+  state = me.state(State)
+  if state.audio_recorder_state == "recording":
+    return me.Style(background=me.theme_var("tertiary"), color=me.theme_var("on-tertiary"))
+  if state.gemini_live_api_enabled:
+    return me.Style(background=me.theme_var("error"), color=me.theme_var("on-error"))
+  return me.Style()
+def score_box() -> me.Style:
+  state = me.state(State)
+  return me.Style(
+    background=COLOR_BLUE if state.gemini_live_api_enabled else COLOR_DISABLED,
+    color="white" if state.gemini_live_api_enabled else COLOR_DISABLED,
+    font_weight="bold",
+    font_size="2.2vw",
+    padding=me.Padding.all(15),
+    text_align="center",
+  )
+def current_clue_box() -> me.Style:
+  state = me.state(State)
+  return me.Style(
+    background=COLOR_BLUE if state.gemini_live_api_enabled else COLOR_DISABLED,
+    color=COLOR_YELLOW if state.gemini_live_api_enabled else COLOR_DISABLED,
+    font_size="1em",
+    font_weight="bold",
+    padding=me.Padding.all(15),
+  )
+def board_col_grid() -> me.Style:
+  state = me.state(State)
+  return me.Style(
+    background="#000" if state.gemini_live_api_enabled else "#ddd",
+    display="grid",
+    gap="5px",
+    grid_template_columns="repeat(6, 1fr)",
+  )
+def category_box() -> me.Style:
+  state = me.state(State)
+  return me.Style(
+    background=COLOR_BLUE if state.gemini_live_api_enabled else COLOR_DISABLED,
+    color="white",
+    font_weight="bold",
+    font_size="1em",
+    padding=me.Padding.all(15),
+    text_align="center",
+  )
+def clue_box(is_selectable: bool) -> me.Style:
+  """Style for clue box
+  Args:
+    is_selectable: Visual signify if the clue is selectable.
+  """
+  state = me.state(State)
+  return me.Style(
+    background=COLOR_BLUE if state.gemini_live_api_enabled else COLOR_DISABLED,
+    color=COLOR_YELLOW,
+    cursor="pointer" if is_selectable else "default",
+    font_size="1em",
+    font_weight="bold",
+    padding=me.Padding.all(15),
+    text_align="center",
+  )
+def response_button(disabled: bool) -> me.Style:
+  """Styles for response submit button.
+  Args:
+    disabled: Since we're overriding the style, we need to handle disabled state
+  """
+  if disabled:
+    return me.Style(background=COLOR_DISABLED_BUTTON_BG, color="#eee")
+  return me.Style(background=COLOR_BLUE, color="white")
+def score_text(score: int) -> me.Style:
+  """In Jeopardy when the score is negative, it is red instead of white."""
+  state = me.state(State)
+  if not state.gemini_live_api_enabled:
+    return me.Style(color=COLOR_DISABLED)
+  if score < 0:
+    return me.Style(color=COLOR_RED)
+  return me.Style(color="white")

main.py ADDED Viewed

	@@ -0,0 +1,381 @@

+import json
+import time
+import css
+import trebek_bot
+from models import Clue
+import mesop as me
+import mesop.labs as mel
+from web_components.gemini_live_connection import gemini_live_connection
+from web_components.audio_recorder import audio_recorder
+from web_components.audio_player import audio_player
+from state import State
+def on_load(e: me.LoadEvent):
+  """Update system instructions with the randomly selected game categories."""
+  state = me.state(State)
+  categories = [question_set[0].category for question_set in state.board.clues]
+  state.gemini_live_api_config = trebek_bot.make_gemini_live_api_config(
+    system_instructions=trebek_bot.make_system_instruction(categories)
+  )
+@me.page(
+  path="/",
+  title="Mesop Jeopardy Live",
+  security_policy=me.SecurityPolicy(
+    allowed_connect_srcs=["wss://generativelanguage.googleapis.com"],
+    allowed_iframe_parents=["https://huggingface.co"],
+    allowed_script_srcs=[
+      "https://cdn.jsdelivr.net",
+    ],
+  ),
+  on_load=on_load,
+)
+def app():
+  state = me.state(State)
+  with me.box(style=css.MAIN_COL_GRID):
+    with me.box(style=css.board_col_grid()):
+      for col_index in range(len(state.board.clues[0])):
+        # Render Jeopardy categories
+        if col_index == 0:
+          for row_index in range(len(state.board.clues)):
+            cell = state.board.clues[row_index][col_index]
+            with me.box(style=css.category_box()):
+              if state.gemini_live_api_enabled:
+                me.text(cell.category)
+              else:
+                me.text("")
+        # Render Jeopardy questions
+        for row_index in range(len(state.board.clues)):
+          cell = state.board.clues[row_index][col_index]
+          key = f"clue-{row_index}-{col_index}"
+          is_selectable = not (key in state.answered_questions or state.selected_question_key)
+          with me.box(
+            style=css.clue_box(state.gemini_live_api_enabled and is_selectable),
+            key=key,
+            on_click=on_click_cell,
+          ):
+            if not state.gemini_live_api_enabled:
+              me.text("")
+            elif key in state.answered_questions:
+              me.text("")
+            elif key == state.selected_question_key:
+              me.text(cell.question, style=me.Style(text_align="left"))
+            else:
+              me.text(f"${cell.normalized_value}", style=me.Style(font_size="2.2vw"))
+    # Sidebar
+    with me.box(style=css.SIDEBAR):
+      me.input(
+        label="Google API Key",
+        on_input=on_input_api_key,
+        readonly=state.gemini_live_api_enabled,
+        style=css.TEXT_INPUT,
+        type="password",
+        value=state.api_key,
+      )
+      with me.box(style=css.TOOLBAR_SECTION):
+        gemini_live_button()
+        audio_player_button()
+        audio_recorder_button()
+      # Score
+      with me.box(style=css.SIDEBAR_SECTION):
+        me.text("Score", type="headline-5", style=css.sidebar_header())
+        with me.box(style=css.score_box()):
+          me.text(format_dollars(state.score), style=css.score_text(state.score))
+      # Clue
+      with me.box(style=css.SIDEBAR_SECTION):
+        me.text("Clue", type="headline-5", style=css.sidebar_header())
+        with me.box(style=css.current_clue_box()):
+          if state.selected_question_key:
+            selected_question = get_selected_question(state.board, state.selected_question_key)
+            me.text(selected_question.question)
+          else:
+            me.text("No clue selected. Please select one.", style=me.Style(font_style="italic"))
+      # Response
+      with me.box(style=css.SIDEBAR_SECTION):
+        me.text("Response", type="headline-5", style=css.sidebar_header())
+        me.textarea(
+          disabled=not bool(state.selected_question_key),
+          label="Enter your response",
+          on_blur=on_input_response,
+          style=css.TEXT_INPUT,
+          value=state.response_value,
+        )
+        disabled = not bool(state.selected_question_key)
+        me.button(
+          disabled=disabled,
+          label="Submit your response",
+          on_click=on_click_submit,
+          style=css.response_button(disabled),
+          type="flat",
+        )
+@me.component
+def gemini_live_button():
+  state = me.state(State)
+  with gemini_live_connection(
+    api_config=state.gemini_live_api_config,
+    api_key=state.api_key,
+    enabled=state.gemini_live_api_enabled,
+    on_start=on_gemini_live_api_started,
+    on_stop=on_gemini_live_api_stopped,
+    on_tool_call=handle_tool_calls,
+    text_input=state.text_input,
+    tool_call_responses=state.tool_call_responses,
+  ):
+    with me.tooltip(message=get_gemini_live_tooltip()):
+      with me.content_button(
+        disabled=not state.api_key,
+        style=css.game_button(),
+        type="icon",
+      ):
+        if state.gemini_live_api_enabled:
+          me.icon(icon="stop")
+        else:
+          me.icon(icon="play_arrow")
+@me.component
+def audio_player_button():
+  state = me.state(State)
+  with audio_player(
+    enabled=state.audio_player_enabled, on_play=on_audio_play, on_stop=on_audio_stop
+  ):
+    with me.tooltip(message=get_audio_player_tooltip()):
+      with me.content_button(
+        disabled=True,
+        style=css.audio_button(),
+        type="icon",
+      ):
+        if state.audio_player_enabled:
+          me.icon(icon="volume_up")
+        else:
+          me.icon(icon="volume_mute")
+@me.component
+def audio_recorder_button():
+  state = me.state(State)
+  with audio_recorder(
+    state=state.audio_recorder_state, on_state_change=on_audio_recorder_state_change
+  ):
+    with me.tooltip(message=get_audio_recorder_tooltip()):
+      with me.content_button(
+        disabled=not state.gemini_live_api_enabled,
+        style=css.mic_button(),
+        type="icon",
+      ):
+        if state.audio_recorder_state == "initializing":
+          me.icon(icon="pending")
+        else:
+          me.icon(icon="mic")
+def on_click_cell(e: me.ClickEvent):
+  """Selects the given clue by prompting Gemini Live API."""
+  state = me.state(State)
+  clue = get_selected_question(state.board, e.key)
+  me.state(State).text_input = f"I'd like to select {clue.category}, for ${clue.normalized_value}."
+def on_input_response(e: me.InputBlurEvent):
+  """Stores user input into state, so we can process their response."""
+  state = me.state(State)
+  state.response = e.value
+def on_click_submit(e: me.ClickEvent):
+  """Submit user response to clue to check if they are correct using Gemini Live API."""
+  state = me.state(State)
+  if not state.response.strip():
+    return
+  state.text_input = state.response
+  # Hack to reset text input. Update the initial response value to current response
+  # first, which will trigger a diff when we set the initial response back to empty
+  # string.
+  #
+  # A small delay is also needed because some times the yield happens too fast, which
+  # does not allow the UI on the client to update properly.
+  state.response_value = state.response
+  yield
+  time.sleep(0.5)
+  state.response_value = ""
+  yield
+def get_selected_question(board, selected_question_key) -> Clue:
+  """Gets the selected question from the key."""
+  _, row, col = selected_question_key.split("-")
+  return board.clues[int(row)][int(col)]
+def format_dollars(value: int) -> str:
+  """Formats an integer value in US dollars format."""
+  if value < 0:
+    return f"-${value * -1:,}"
+  return f"${value:,}"
+def get_gemini_live_tooltip() -> str:
+  """Tooltip messages for Gemini Live API web component button."""
+  state = me.state(State)
+  if state.gemini_live_api_enabled:
+    return "Stop game"
+  if state.api_key:
+    return "Start game"
+  return "Game disabled. Enter API Key."
+def get_audio_player_tooltip() -> str:
+  """Tooltip messages for Audio player web component button."""
+  state = me.state(State)
+  if state.audio_player_enabled:
+    return "Audio playing"
+  if state.gemini_live_api_enabled:
+    return "Audio not playing"
+  return "Audio disabled"
+def get_audio_recorder_tooltip() -> str:
+  """Tooltip messages for Audio recorder web component button."""
+  state = me.state(State)
+  if state.audio_recorder_state == "initializing":
+    "Microphone initializing"
+  if state.audio_recorder_state == "recording":
+    return "Microphone on"
+  if state.gemini_live_api_enabled:
+    return "Microphone muted"
+  return "Microphone disabled"
+def on_input_api_key(e: me.InputEvent):
+  """Captures Google API key input"""
+  state = me.state(State)
+  state.api_key = e.value
+def on_audio_play(e: mel.WebEvent):
+  """Event for when audio player play button was clicked."""
+  me.state(State).audio_player_enabled = True
+def on_audio_stop(e: mel.WebEvent):
+  """Event for when audio player stop button was clicked."""
+  me.state(State).audio_player_enabled = False
+def on_audio_recorder_state_change(e: mel.WebEvent):
+  """Event for when audio recorder state changes."""
+  me.state(State).audio_recorder_state = e.value
+def on_gemini_live_api_started(e: mel.WebEvent):
+  """Event for when Gemin Live API start button was clicked."""
+  me.state(State).gemini_live_api_enabled = True
+def on_gemini_live_api_stopped(e: mel.WebEvent):
+  """Event for when Gemin Live API stop button was clicked."""
+  state = me.state(State)
+  state.gemini_live_api_enabled = False
+  state.selected_question_key = ""
+  state.response_value = ""
+def handle_tool_calls(e: mel.WebEvent):
+  """Proceses tool calls from Gemini Live API.
+  Supported tool calls:
+  - get_clue
+  - update_score
+  """
+  state = me.state(State)
+  tool_calls = json.loads(e.value["toolCalls"])
+  responses = []
+  for tool_call in tool_calls:
+    result = None
+    if tool_call["name"] == "get_clue":
+      result = tool_call_get_clue(
+        tool_call["args"]["category_index"], tool_call["args"]["dollar_index"]
+      )
+    elif tool_call["name"] == "update_score":
+      result = tool_call_update_score(tool_call["args"]["is_correct"])
+    responses.append(
+      {
+        "id": tool_call["id"],
+        "name": tool_call["name"],
+        "response": {
+          "result": result,
+        },
+      }
+    )
+  if responses:
+    print(responses)
+    state.tool_call_responses = json.dumps(responses)
+def tool_call_update_score(is_correct: bool) -> str:
+  """Updates the user's score
+  Gemini will determine if the user is correct and then call this tool which will
+  allow the game state to be updated appropriately.
+  """
+  state = me.state(State)
+  selected_question = get_selected_question(state.board, state.selected_question_key)
+  if is_correct:
+    state.score += selected_question.normalized_value
+  else:
+    state.score -= selected_question.normalized_value
+  # Clear question so another can be picked.
+  state.answered_questions.add(state.selected_question_key)
+  state.selected_question_key = ""
+  return f"The user's score is {state.score}"
+def tool_call_get_clue(category_index, dollar_index) -> str:
+  """Gets the selected clue.
+  Gemini will parse the user request and make a tool call with the row/col indexes.
+  Example: "Category X for $400".
+  """
+  cell_key = f"clue-{category_index}-{dollar_index}"
+  response = handle_select_clue(cell_key)
+  if isinstance(response, str):
+    return "There was an error. " + response
+  return f"The clue is {response.question}\n\n The answer to the clue is {response.answer}\n\n Please read the clue to the user."
+def handle_select_clue(clue_key: str) -> Clue | str:
+  """Handles logic for clicking on a clue.
+  If it returns a string, it will be an error message.
+  If it returns a clue, that means a valid clue was selected.
+  """
+  state = me.state(State)
+  if state.selected_question_key:
+    return "A clue has already been selected."
+  if clue_key in state.answered_questions:
+    return "That clue has already been selected"
+  state.selected_question_key = clue_key
+  return get_selected_question(state.board, state.selected_question_key)

models.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from pydantic import BaseModel
+class Clue(BaseModel):
+  air_date: str
+  category: str
+  question: str
+  value: str | None
+  answer: str
+  round: str
+  show_number: str
+  raw_value: int = 0
+  normalized_value: int = 0
+class Board(BaseModel):
+  clues: list[list[Clue]]

question_bank.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import json
+import re
+from collections import defaultdict
+from models import Clue
+QuestionSet = list[Clue]
+_JEOPARDY_DATA = "data/jeopardy.json"
+_NUM_QUESTIONS_PER_CATEGORY = 5
+def load() -> list[QuestionSet]:
+  """Loads a cleaned up data set to use in Mesop Jeopardy game."""
+  data = _load_raw_data()
+  data = _add_raw_value(data)
+  data = _clean_questions(data)
+  question_sets = _group_into_question_sets(data)
+  question_sets = _sort_question_sets(question_sets)
+  question_sets = _normalize_values(question_sets)
+  return _filter_out_incomplete_question_sets(question_sets)
+def _load_raw_data() -> QuestionSet:
+  """Load the raw data set.
+  Format of each question/clue looks like this:
+  {
+    "category": "HISTORY",
+    "air_date": "2004-12-31",
+    "question": "'For the last 8 years of his life, Galileo was...",
+    "value": "$200",
+    "answer": "Copernicus",
+    "round": "Jeopardy!",
+    "show_number": "4680"
+  }
+  """
+  with open(_JEOPARDY_DATA, "r") as f:
+    return [Clue(**row) for row in json.load(f)]
+def _add_raw_value(data: QuestionSet) -> QuestionSet:
+  """Add raw value since the value is formatted as a dollar string that isn't as easy
+  to sort"""
+  for row in data:
+    row.raw_value = _convert_dollar_amount(row.value)
+  return data
+def _clean_questions(data: QuestionSet) -> QuestionSet:
+  """Clean up questions
+  - Strip single quotes around each question
+  - Replace escaped single quotes
+  - Strip HTML tags
+  """
+  for row in data:
+    row.question = re.sub("<[^<]+?>", "", row.question.strip("'").replace("\\'", "'"))
+  return data
+def _convert_dollar_amount(value: str | None) -> int:
+  """Coverts raw value into an integer.
+  The raw value is string formatted as a dollar amount, such as $1,000. In this
+  dataset the dollar amount is not given for Daily Doubles that were not answered, so
+  we'll set those cases to a value of 0 for now.
+  In addition, answered daily doubles will have odd dollar amounts.
+  These values won't be used in the actually game. Only for roughly sorting the
+  question difficulty.
+  """
+  if value:
+    return int(value.replace("$", "").replace(",", ""))
+  else:
+    return 0
+def _group_into_question_sets(data: QuestionSet) -> list[QuestionSet]:
+  """Groups the questions by category for that air date.
+  We want to mix and match questions across games, but we want to keep the questions
+  within a category together.
+  """
+  question_sets = defaultdict(lambda: [])
+  for row in data:
+    question_sets[(row.category, row.air_date)].append(row)
+  return list(question_sets.values())
+def _sort_question_sets(question_sets: list[QuestionSet]) -> list[QuestionSet]:
+  return [_sort_question_set(question_set) for question_set in question_sets]
+def _sort_question_set(question_set: QuestionSet) -> QuestionSet:
+  """Sort the question sets so they are ordered roughly in order difficulty.
+  This will not always be true due to Daily Doubles skewing the order. The data set
+  did not store the Daily Double values separately from the normal game value.
+  """
+  return sorted(question_set, key=lambda q: q.raw_value)
+def _normalize_values(question_sets: list[QuestionSet]) -> list[QuestionSet]:
+  """Normalizes question dollar amounts based on order of appearance.
+  Since we picking random categories across different rounds and years, the dollar
+  values will differ. So we will normalize them here.
+  """
+  for question_set in question_sets:
+    for index, question in enumerate(question_set):
+      question.normalized_value = (index + 1) * 200
+  return question_sets
+def _filter_out_incomplete_question_sets(question_sets: list[QuestionSet]) -> list[QuestionSet]:
+  """Filters out question sets that are incomplete (do not contain five questions).
+  Final Jeopardy categories only have one question so we want to ignore those.
+  We also want to avoid anomalies in the data set.
+  In addition there are cases where not all questions were answered for a category. This
+  means that we will be missing a question on the board.
+  """
+  return [
+    question_set
+    for question_set in question_sets
+    if len(question_set) == _NUM_QUESTIONS_PER_CATEGORY
+  ]

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+Flask==3.1.0
+google-genai==0.6.0
+gunicorn==23.0.0
+mesop==0.14.1
+pydantic==2.10.5
+websockets==14.2

ruff.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ line-length = 100
2	+ indent-width = 2

state.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from typing import Literal
+from dataclasses import field
+import random
+import os
+import question_bank
+import mesop as me
+from models import Board
+_NUM_CATEGORIES = 6
+_QUESTION_SETS = question_bank.load()
+@me.stateclass
+class State:
+  selected_clue: str
+  board: Board = field(default_factory=lambda: make_default_board(_QUESTION_SETS))
+  # Used for clearing the text input.
+  response_value: str
+  response: str
+  score: int
+  # Key format: click-{row_index}-{col_index}
+  selected_question_key: str
+  # Set is not JSON serializable
+  # Key format: click-{row_index}-{col_index}
+  answered_questions: set[str] = field(default_factory=set)
+  # Gemini Live API
+  api_key: str = os.getenv("GOOGLE_API_KEY", "")
+  gemini_live_api_enabled: bool = False
+  gemini_live_api_config: str
+  audio_player_enabled: bool = False
+  audio_recorder_state: Literal["disabled", "initializing", "recording"] = "disabled"
+  tool_call_responses: str = ""
+  text_input: str = ""
+def make_default_board(jeopardy_questions) -> Board:
+  """Creates a board with some random jeopardy questions."""
+  random.shuffle(jeopardy_questions)
+  return Board(clues=jeopardy_questions[:_NUM_CATEGORIES])

trebek_bot.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from typing import Literal
+import json
+type VoiceName = Literal["Aoede", "Charon", "Fenrir", "Kore", "Puck"]
+type GeminiModel = Literal["gemini-2.0-flash-exp"]
+_TOOL_DEFINITIONS = {
+  "functionDeclarations": [
+    {
+      "name": "get_clue",
+      "description": "Gets the clue from the board which returns the clue and answer",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "category_index": {"type": "integer", "description": "Index of selected category."},
+          "dollar_index": {"type": "integer", "description": "Index of selected dollar amount."},
+        },
+        "required": ["category_index", "dollar_index"],
+      },
+    },
+    {
+      "name": "update_score",
+      "description": "Updates whether user got the question correct or not.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "is_correct": {"type": "boolean", "description": "True if correct. False is incorrect."},
+        },
+        "required": ["is_correct"],
+      },
+    },
+  ]
+}
+_SYSTEM_INSTRUCTIONS = """
+You are the host of Jeopardy. Make sure users follow the rules of the game.
+You have access to the following tools:
+- get_clue: Gets the clue selected by the user. Always use this for picking clues. Do not make up your own clues.
+- update_score: Updates the users score depending on if they answered the clue correctly.
+The categories are [[categories]]. Each category has 5 questions, with the following dollar
+amounts: $200, $400, $600, $800, $1000.
+When the user asks for a clue, they will specify the category and dollar amount. Use the
+`get_clue` tool by passing in the corresponding indexes for the category and dollar
+amount.
+For example if the categories are Witches, Gold Rush, American History, Desserts, Wet & Wild,
+and the user says "American History for $800", the index will be 2 for the category and 3
+for the dollar amount.
+The `get_clue` tool will return the clue and answer if it is valid. If it is invalid it
+will return an error message.
+Wait for the `get_clue` tool response before responding.
+When you get the response to the `get_clue` tool, read the clue to the user.
+Briefly explain to the user why their answer is correct or wrong.
+Use the `update_score` tool to update their score. Pass in true if they were correct.
+Pass in false if they were not correct. This tool will return the user's current score.
+""".strip()
+def make_system_instruction(categories: list[str]):
+  return _SYSTEM_INSTRUCTIONS.replace("[[categories]]", ", ".join(categories))
+def make_gemini_live_api_config(
+  model: GeminiModel = "gemini-2.0-flash-exp",
+  system_instructions: str = "",
+  voice_name: VoiceName = "Puck",
+):
+  return json.dumps(
+    {
+      "setup": {
+        "model": f"models/{model}",
+        "system_instruction": {"role": "user", "parts": [{"text": system_instructions}]},
+        "tools": _TOOL_DEFINITIONS,
+        "generation_config": {
+          "temperature": 0.3,
+          "response_modalities": ["audio"],
+          "speech_config": {"voice_config": {"prebuilt_voice_config": {"voice_name": voice_name}}},
+        },
+      }
+    }
+  )

web_components/audio_player.js ADDED Viewed

	@@ -0,0 +1,172 @@

+import {
+  LitElement,
+  html,
+} from "https://cdn.jsdelivr.net/gh/lit/dist@3/core/lit-core.min.js";
+class AudioPlayer extends LitElement {
+  static properties = {
+    playEvent: { type: String },
+    stopEvent: { type: String },
+    enabled: { type: Boolean },
+    data: { type: String },
+  };
+  constructor() {
+    super();
+    this.enabled = false;
+    this.audioContext = null; // Initialize audio context
+    this.sampleRate = 24000; // Gemini Live API sends data in 24000hz
+    this.channels = 1;
+    this.queue = [];
+    this.isPlaying = false;
+    this.onGeminiLiveStarted = (e) => {
+      if (!this.enabled) {
+        this.playAudio();
+      }
+    };
+    this.onGeminiLiveStopped = (e) => {
+      this.dispatchEvent(new MesopEvent(this.stopEvent, {}));
+    };
+    this.onAudioOutputReceived = (e) => {
+      this.addToQueue(e.detail.data);
+    };
+  }
+  connectedCallback() {
+    super.connectedCallback();
+    window.addEventListener(
+      "audio-output-received",
+      this.onAudioOutputReceived
+    );
+    window.addEventListener(
+      "gemini-live-api-started",
+      this.onGeminiLiveStarted
+    );
+    window.addEventListener(
+      "gemini-live-api-stopped",
+      this.onGeminiLiveStopped
+    );
+  }
+  disconnectedCallback() {
+    super.disconnectedCallback();
+    if (this.audioContext) {
+      this.audioContext.close();
+    }
+    window.removeEventListener(
+      "audio-output-received",
+      this.onAudioInputReceived
+    );
+    window.removeEventListener(
+      "gemini-live-api-started",
+      this.onGeminiLiveStarted
+    );
+    window.removeEventListener(
+      "gemini-live-api-stopped",
+      this.onGeminiLiveStopped
+    );
+  }
+  firstUpdated() {
+    if (this.enabled) {
+      this.playAudio();
+    }
+  }
+  updated(changedProperties) {
+    // Add audio chunks to queue to play.
+    if (changedProperties.has("data") && this.data.length > 0) {
+      this.addToQueue(this.data);
+    }
+    // Clear the queue if the audio player is disabled.
+    if (changedProperties.has("enabled") && !this.enabled) {
+      this.queue = [];
+    }
+  }
+  addToQueue(base64Data) {
+    if (!this.enabled) {
+      return;
+    }
+    this.queue.push(base64Data);
+    if (!this.isPlaying) {
+      this.playNext();
+    }
+  }
+  playAudio() {
+    if (!this.enabled) {
+      this.dispatchEvent(new MesopEvent(this.playEvent, {}));
+    }
+    if (!this.audioContext) {
+      this.audioContext = new AudioContext();
+    }
+    this.playNext();
+  }
+  playNext() {
+    if (!this.enabled || !this.audioContext || this.queue.length === 0) {
+      this.isPlaying = false;
+      return;
+    }
+    this.isPlaying = true;
+    const data = this.queue.shift();
+    const source = this.playPCM(data);
+    source.onended = () => {
+      this.playNext();
+    };
+  }
+  playPCM(data) {
+    // Convert base64 to binary.
+    const binaryAudio = atob(data);
+    // Convert binary string to ArrayBuffer.
+    const audioBuffer = new ArrayBuffer(binaryAudio.length);
+    const bufferView = new Uint8Array(audioBuffer);
+    for (let i = 0; i < binaryAudio.length; i++) {
+      bufferView[i] = binaryAudio.charCodeAt(i);
+    }
+    // Convert to 16-bit PCM data.
+    const pcmData = new Int16Array(audioBuffer);
+    // Create audio buffer.
+    const frameCount = pcmData.length;
+    const audioBufferData = this.audioContext.createBuffer(
+      this.channels,
+      frameCount,
+      this.sampleRate
+    );
+    // Get channel data and convert PCM to float32.
+    const channelData = audioBufferData.getChannelData(0);
+    for (let i = 0; i < frameCount; i++) {
+      // Convert 16-bit PCM (-32768 to 32767) to float32 (-1.0 to 1.0)
+      channelData[i] = pcmData[i] / 32768.0;
+    }
+    // Create and play the source.
+    const source = this.audioContext.createBufferSource();
+    source.buffer = audioBufferData;
+    source.connect(this.audioContext.destination);
+    source.start();
+    return source;
+  }
+  render() {
+    if (this.enabled) {
+      return html`<span><slot></slot></span>`;
+    }
+    return html`<span @click="${this.playAudio}"><slot></slot></span>`;
+  }
+}
+customElements.define("audio-player", AudioPlayer);

web_components/audio_player.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from typing import Any, Callable
+import base64
+import mesop.labs as mel
+@mel.web_component(path="./audio_player.js")
+def audio_player(
+  *,
+  enabled: bool = False,
+  data: bytes = b"",
+  on_play: Callable[[mel.WebEvent], Any] | None = None,
+  on_stop: Callable[[mel.WebEvent], Any] | None = None,
+):
+  """Plays audio streamed from the server.
+  An important thing to note is that the audio player does not persist the data it
+  receives. Instead the data is stored in a queue and removed once the audio has been
+  played.
+  This is a barebones configuration that sets the sample rate to 24000hz since that is
+  what Gemini returns. In addition we expect the data to be in PCM format.
+  """
+  return mel.insert_web_component(
+    name="audio-player",
+    events=_filter_events(
+      {
+        "playEvent": on_play,
+        "stopEvent": on_stop,
+      }
+    ),
+    properties={
+      "enabled": enabled,
+      "data": base64.b64encode(data).decode("utf-8"),
+    },
+  )
+def _filter_events(events: dict[str, Callable[[mel.WebEvent], Any] | None]):
+  return {event: callback for event, callback in events.items() if callback}

web_components/audio_recorder.js ADDED Viewed

	@@ -0,0 +1,496 @@

+import {
+  LitElement,
+  html,
+} from "https://cdn.jsdelivr.net/gh/lit/dist@3/core/lit-core.min.js";
+class AudioRecorder extends LitElement {
+  static properties = {
+    dataEvent: { type: String },
+    stateChangeEvent: { type: String },
+    state: { type: String },
+    isRecording: { type: Boolean },
+    debugBuffer: { state: true },
+    debug: { type: Boolean },
+    voiceDetectionEnabled: { type: Boolean },
+    voiceThreshold: { type: Number },
+    voiceHoldTime: { type: Number },
+  };
+  constructor() {
+    super();
+    this.debug = false;
+    this.mediaStream = null;
+    this.audioContext = null;
+    this.processor = null;
+    this.isStreaming = false;
+    this.isRecording = false;
+    this.isInitializing = false;
+    this.sequenceNumber = 0;
+    this.debugBuffer = [];
+    this.debugBufferSize = 50;
+    this.targetSampleRate = 16000;
+    // Voice detection parameters
+    this.voiceDetectionEnabled = true; // Enable by default
+    this.voiceThreshold = 0.01; // RMS threshold for voice detection
+    this.voiceHoldTime = 500; // Time to hold voice detection state in ms
+    this.lastVoiceDetectedTime = 0; // Last time voice was detected
+    this.isVoiceDetected = false; // Current voice detection state
+    this.consecutiveSilentFrames = 0; // Counter for silent frames
+    this.silenceThreshold = 10; // Number of silent frames before cutting off
+    this.onGeminiLiveStarted = (e) => {
+      if (this.isRecording) {
+        this.startStreaming();
+      }
+    };
+    this.onGeminiLiveStopped = (e) => {
+      this.stop();
+    };
+  }
+  connectedCallback() {
+    super.connectedCallback();
+    window.addEventListener(
+      "gemini-live-api-started",
+      this.onGeminiLiveStarted
+    );
+    window.addEventListener(
+      "gemini-live-api-stopped",
+      this.onGeminiLiveStopped
+    );
+  }
+  disconnectedCallback() {
+    super.disconnectedCallback();
+    this.stop();
+    window.removeEventListener(
+      "gemini-live-api-started",
+      this.onAudioInputReceived
+    );
+    window.removeEventListener(
+      "gemini-live-api-stopped",
+      this.onGeminiLiveStopped
+    );
+  }
+  firstUpdated() {
+    if (this.state !== "disabled") {
+      this.startStreaming();
+    }
+  }
+  log(...args) {
+    if (this.debug) {
+      console.log(...args);
+    }
+  }
+  warn(...args) {
+    if (this.debug) {
+      console.warn(...args);
+    }
+  }
+  error(...args) {
+    if (this.debug) {
+      console.error(...args);
+    }
+  }
+  isVoiceFrame(audioData) {
+    // Calculate RMS of the audio frame
+    let sumSquares = 0;
+    for (let i = 0; i < audioData.length; i++) {
+      sumSquares += audioData[i] * audioData[i];
+    }
+    const rms = Math.sqrt(sumSquares / audioData.length);
+    const now = Date.now();
+    // Check if we detect voice in this frame
+    if (rms > this.voiceThreshold) {
+      this.lastVoiceDetectedTime = now;
+      this.consecutiveSilentFrames = 0;
+      this.isVoiceDetected = true;
+      return true;
+    }
+    // Check if we're still within the hold time
+    if (now - this.lastVoiceDetectedTime < this.voiceHoldTime) {
+      return true;
+    }
+    // Increment silent frames counter
+    this.consecutiveSilentFrames++;
+    // If we've seen enough silent frames, mark as silent
+    if (this.consecutiveSilentFrames > this.silenceThreshold) {
+      this.isVoiceDetected = false;
+    }
+    return this.isVoiceDetected;
+  }
+  async startStreaming() {
+    if (this.state === "disabled") {
+      this.dispatchEvent(new MesopEvent(this.stateChangeEvent, "initializing"));
+    }
+    this.isInitializing = true;
+    const initialized = await this.initialize();
+    this.isInitializing = false;
+    if (initialized) {
+      this.isRecording = true;
+      this.dispatchEvent(new MesopEvent(this.stateChangeEvent, "recording"));
+      this.start();
+    }
+  }
+  async initialize() {
+    try {
+      // First check what sample rates are supported with echo cancellation
+      const testStream = await navigator.mediaDevices.getUserMedia({
+        audio: {
+          echoCancellation: true,
+          noiseSuppression: true,
+          autoGainControl: true,
+        },
+        video: false,
+      });
+      // Get the actual sample rate from the system
+      const systemTrack = testStream.getAudioTracks()[0];
+      const settings = systemTrack.getSettings();
+      this.log("System audio settings:", settings);
+      // Clean up the test stream
+      testStream.getTracks().forEach((track) => track.stop());
+      // Now create the real stream using the system's capabilities
+      this.mediaStream = await navigator.mediaDevices.getUserMedia({
+        audio: {
+          channelCount: 1,
+          sampleRate: settings.sampleRate,
+          echoCancellation: true,
+          noiseSuppression: true,
+          autoGainControl: true,
+          echoCancellationType: "system",
+          latency: 0,
+        },
+        video: false,
+      });
+      // Log the actual constraints that were applied
+      const audioTrack = this.mediaStream.getAudioTracks()[0];
+      const actualConstraints = audioTrack.getSettings();
+      this.log("Applied audio constraints:", actualConstraints);
+      // Set up audio context matching the system rate
+      this.audioContext = new AudioContext({
+        sampleRate: settings.sampleRate,
+      });
+      this.log(
+        "AudioContext created with sample rate:",
+        this.audioContext.sampleRate
+      );
+      const micSource = this.audioContext.createMediaStreamSource(
+        this.mediaStream
+      );
+      this.processor = this.audioContext.createScriptProcessor(4096, 1, 1);
+      // Connect the audio nodes
+      micSource.connect(this.processor);
+      this.processor.connect(this.audioContext.destination);
+      return true;
+    } catch (error) {
+      this.error("Error initializing audio streamer:", error);
+      return false;
+    }
+  }
+  downsampleBuffer(buffer, originalSampleRate) {
+    if (originalSampleRate === this.targetSampleRate) {
+      return buffer;
+    }
+    const ratio = originalSampleRate / this.targetSampleRate;
+    const newLength = Math.floor(buffer.length / ratio);
+    const result = new Float32Array(newLength);
+    for (let i = 0; i < newLength; i++) {
+      const startIndex = Math.floor(i * ratio);
+      const endIndex = Math.floor((i + 1) * ratio);
+      let sum = 0;
+      let count = 0;
+      for (let j = startIndex; j < endIndex && j < buffer.length; j++) {
+        sum += buffer[j];
+        count++;
+      }
+      result[i] = count > 0 ? sum / count : 0;
+    }
+    this.log("Downsampling details:", {
+      originalRate: originalSampleRate,
+      targetRate: this.targetSampleRate,
+      originalLength: buffer.length,
+      newLength: result.length,
+      actualRatio: buffer.length / result.length,
+    });
+    return result;
+  }
+  addAudioDebugger(sourceNode, label) {
+    if (!this.debug) return;
+    const analyser = this.audioContext.createAnalyser();
+    analyser.fftSize = 2048;
+    sourceNode.connect(analyser);
+    const bufferLength = analyser.frequencyBinCount;
+    const dataArray = new Float32Array(bufferLength);
+    this.debugInterval = setInterval(() => {
+      if (!this.isStreaming) return;
+      analyser.getFloatTimeDomainData(dataArray);
+      let rms = 0;
+      for (let i = 0; i < bufferLength; i++) {
+        rms += dataArray[i] * dataArray[i];
+      }
+      rms = Math.sqrt(rms / bufferLength);
+      this.log(`${label} RMS Level: ${rms.toFixed(6)}`);
+    }, 1000);
+  }
+  start() {
+    this.isStreaming = true;
+    this.debugBuffer = [];
+    this.lastVoiceDetectedTime = 0;
+    this.isVoiceDetected = false;
+    this.consecutiveSilentFrames = 0;
+    this.processor.onaudioprocess = (event) => {
+      if (!this.isStreaming) return;
+      const inputData = event.inputBuffer.getChannelData(0);
+      const originalSampleRate = event.inputBuffer.sampleRate;
+      // Log initial processing details if needed
+      if (this.sequenceNumber === 0) {
+        this.log("Audio Processing Details:", {
+          bufferSize: this.processor.bufferSize,
+          inputChannels: this.processor.numberOfInputs,
+          outputChannels: this.processor.numberOfOutputs,
+          originalSampleRate: originalSampleRate,
+          targetSampleRate: this.targetSampleRate,
+          length: inputData.length,
+          timestamp: event.timeStamp,
+        });
+      }
+      // Check for voice activity if enabled
+      if (this.voiceDetectionEnabled && !this.isVoiceFrame(inputData)) {
+        // Skip this frame if no voice is detected
+        this.sequenceNumber++; // Still increment to maintain sequence
+        return;
+      }
+      const downsampledData = this.downsampleBuffer(
+        inputData,
+        originalSampleRate
+      );
+      const processedData = new Float32Array(downsampledData.length);
+      const gain = 5.0;
+      for (let i = 0; i < downsampledData.length; i++) {
+        processedData[i] = downsampledData[i] * gain;
+      }
+      // Debug logging
+      if (this.sequenceNumber % 50 === 0 && this.debug) {
+        const stats = {
+          originalLength: inputData.length,
+          downsampledLength: downsampledData.length,
+          maxValue: Math.max(...processedData),
+          minValue: Math.min(...processedData),
+          originalSampleRate,
+          targetSampleRate: this.targetSampleRate,
+          isVoiceDetected: this.isVoiceDetected,
+        };
+        this.log("Audio buffer stats:", stats);
+      }
+      // Store in debug buffer
+      this.debugBuffer.push(processedData);
+      if (this.debugBuffer.length > this.debugBufferSize) {
+        this.debugBuffer.shift();
+      }
+      // Audio level monitoring
+      let rms = 0;
+      for (let i = 0; i < processedData.length; i++) {
+        rms += processedData[i] * processedData[i];
+      }
+      rms = Math.sqrt(rms / processedData.length);
+      if (this.sequenceNumber % 10 === 0 && this.debug) {
+        this.log(
+          `Audio Level (RMS): ${rms.toFixed(4)}, Voice Detected: ${
+            this.isVoiceDetected
+          }`
+        );
+        if (rms < 0.0001) {
+          this.warn(
+            "Warning: Very low audio level detected. Check if microphone is working."
+          );
+        }
+      }
+      // Convert to Int16Array for transmission
+      const intData = new Int16Array(processedData.length);
+      for (let i = 0; i < processedData.length; i++) {
+        intData[i] = Math.max(
+          -32768,
+          Math.min(32767, processedData[i] * 32768)
+        );
+        if (this.sequenceNumber % 100 === 0 && i < 10 && this.debug) {
+          this.log(
+            `Sample ${i}: Float=${processedData[i].toFixed(4)}, Int16=${
+              intData[i]
+            }`
+          );
+        }
+      }
+      // Convert to base64 and dispatch
+      const bytes = new Uint8Array(intData.buffer);
+      const base64Data = btoa(
+        Array.from(bytes)
+          .map((byte) => String.fromCharCode(byte))
+          .join("")
+      );
+      this.dispatchEvent(
+        new MesopEvent(this.dataEvent, {
+          sequence: this.sequenceNumber++,
+          sampleRate: this.targetSampleRate,
+          data: base64Data,
+          isVoice: this.isVoiceDetected,
+        })
+      );
+      this.dispatchEvent(
+        new CustomEvent("audio-input-received", {
+          detail: { data: base64Data },
+          // Allow event to cross shadow DOM boundaries (both need to be true)
+          bubbles: true,
+          composed: true,
+        })
+      );
+    };
+    return true;
+  }
+  stop() {
+    this.isStreaming = false;
+    this.isRecording = false;
+    this.dispatchEvent(new MesopEvent(this.stateChangeEvent, "disabled"));
+    if (this.debugInterval) {
+      clearInterval(this.debugInterval);
+    }
+    if (this.processor) {
+      this.processor.onaudioprocess = null;
+    }
+    if (this.mediaStream) {
+      this.mediaStream.getTracks().forEach((track) => track.stop());
+    }
+    if (this.audioContext) {
+      this.audioContext.close();
+    }
+  }
+  async playbackDebug() {
+    if (!this.debugBuffer.length) {
+      this.log("No audio data available for playback");
+      return;
+    }
+    const playbackContext = new AudioContext();
+    const systemSampleRate = playbackContext.sampleRate;
+    const totalSamples16k =
+      this.debugBuffer.length * this.debugBuffer[0].length;
+    const upsampledLength = Math.round(
+      totalSamples16k * (systemSampleRate / this.targetSampleRate)
+    );
+    const audioBuffer = playbackContext.createBuffer(
+      1,
+      upsampledLength,
+      systemSampleRate
+    );
+    const channelData = audioBuffer.getChannelData(0);
+    const combined16kBuffer = new Float32Array(totalSamples16k);
+    let offset = 0;
+    for (let i = 0; i < this.debugBuffer.length; i++) {
+      combined16kBuffer.set(this.debugBuffer[i], offset);
+      offset += this.debugBuffer[i].length;
+    }
+    const ratio = this.targetSampleRate / systemSampleRate;
+    for (let i = 0; i < upsampledLength; i++) {
+      const position = i * ratio;
+      const index = Math.floor(position);
+      const decimal = position - index;
+      const sample1 = combined16kBuffer[index] || 0;
+      const sample2 = combined16kBuffer[index + 1] || sample1;
+      channelData[i] = sample1 + decimal * (sample2 - sample1);
+    }
+    const source = playbackContext.createBufferSource();
+    source.buffer = audioBuffer;
+    source.connect(playbackContext.destination);
+    source.start();
+    this.log("Playing debug audio at system rate...", {
+      systemSampleRate,
+      originalLength: totalSamples16k,
+      upsampledLength,
+    });
+    source.onended = () => {
+      this.log("Debug playback finished");
+      playbackContext.close();
+    };
+  }
+  render() {
+    if (this.isInitializing) {
+      return html`<span><slot></slot></span>`;
+    }
+    if (this.isRecording) {
+      return html`<span @click="${this.stop}"><slot></slot></span> `;
+    }
+    return html`<span @click="${this.startStreaming}"><slot></slot></span>`;
+  }
+}
+customElements.define("audio-recorder", AudioRecorder);

web_components/audio_recorder.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from typing import Any, Callable, Literal
+import mesop.labs as mel
+@mel.web_component(path="./audio_recorder.js")
+def audio_recorder(
+  *,
+  state: Literal["disabled", "initializing", "recording"] = "disabled",
+  on_data: Callable[[mel.WebEvent], Any] | None = None,
+  on_state_change: Callable[[mel.WebEvent], Any] | None = None,
+):
+  """Records audio and streams audio to the Mesop server.
+  This web components is designed to work with `MESOP_WEBSOCKETS_ENABLED=true`.
+  The `on_data` event returns continuous chunk of audio in base64-encoded PCM format
+  with 16000hz sampling rate. For some reason the Gemini Live API only accepts the PCM
+  data 16000hz. At 48000hz, nothing is returned. Perhaps there is a setting to override
+  the expected sampling rate when sent to the Gemini Live API. Unfortunately, the docs
+  are very sparse right now.
+  The data event looks like:
+    {
+      "data": <base64-encoded-string>
+    }
+  """
+  return mel.insert_web_component(
+    name="audio-recorder",
+    events=_filter_events(
+      {
+        "dataEvent": on_data,
+        "stateChangeEvent": on_state_change,
+      }
+    ),
+    properties={
+      "state": state,
+    },
+  )
+def _filter_events(events: dict[str, Callable[[mel.WebEvent], Any] | None]):
+  return {event: callback for event, callback in events.items() if callback}

web_components/gemini_live_connection.js ADDED Viewed

	@@ -0,0 +1,296 @@

+import {
+  LitElement,
+  html,
+} from "https://cdn.jsdelivr.net/gh/lit/dist@3/core/lit-core.min.js";
+class GeminiLiveConnection extends LitElement {
+  static properties = {
+    api_config: { type: String },
+    enabled: { type: Boolean },
+    endpoint: { type: String },
+    startEvent: { type: String },
+    stopEvent: { type: String },
+    text_input: { type: String },
+    toolCallEvent: { type: String },
+    tool_call_responses: { type: String },
+  };
+  constructor() {
+    super();
+    this.onSetupComplete = () => {
+      console.log("Setup complete...");
+    };
+    this.onAudioData = (base64Data) => {
+      this.dispatchEvent(
+        new CustomEvent("audio-output-received", {
+          detail: { data: base64Data },
+          // Allow event to cross shadow DOM boundaries (both need to be true)
+          bubbles: true,
+          composed: true,
+        })
+      );
+    };
+    this.onInterrupted = () => {};
+    this.onTurnComplete = () => {};
+    this.onError = () => {};
+    this.onClose = () => {
+      console.log("Web socket closed...");
+    };
+    this.onToolCall = (toolCalls) => {
+      this.dispatchEvent(
+        new MesopEvent(this.toolCallEvent, {
+          toolCalls: JSON.stringify(toolCalls.functionCalls),
+        })
+      );
+    };
+    this.pendingSetupMessage = null;
+    this.onAudioInputReceived = (e) => {
+      this.sendAudioChunk(e.detail.data);
+    };
+  }
+  connectedCallback() {
+    super.connectedCallback();
+    // Start listening for events when component is connected
+    window.addEventListener("audio-input-received", this.onAudioInputReceived);
+  }
+  disconnectedCallback() {
+    super.disconnectedCallback();
+    window.removeEventListener(
+      "audio-input-received",
+      this.onAudioInputReceived
+    );
+    if (this.ws) {
+      this.ws.close();
+    }
+  }
+  firstUpdated() {
+    if (this.enabled) {
+      this.setupWebSocket();
+    }
+  }
+  updated(changedProperties) {
+    if (
+      changedProperties.has("tool_call_responses") &&
+      this.tool_call_responses.length > 0
+    ) {
+      this.sendToolResponse(JSON.parse(this.tool_call_responses));
+    }
+    if (changedProperties.has("text_input") && this.text_input.length > 0) {
+      this.sendTextMessage(this.text_input);
+    }
+  }
+  start() {
+    if (!this.enabled) {
+      this.dispatchEvent(new MesopEvent(this.startEvent, {}));
+      this.dispatchEvent(
+        new CustomEvent("gemini-live-api-started", {
+          detail: {},
+          // Allow event to cross shadow DOM boundaries (both need to be true)
+          bubbles: true,
+          composed: true,
+        })
+      );
+    }
+    this.setupWebSocket();
+  }
+  stop() {
+    this.dispatchEvent(new MesopEvent(this.stopEvent, {}));
+    this.dispatchEvent(
+      new CustomEvent("gemini-live-api-stopped", {
+        detail: {},
+        // Allow event to cross shadow DOM boundaries (both need to be true)
+        bubbles: true,
+        composed: true,
+      })
+    );
+    if (this.ws) {
+      this.ws.close();
+    }
+  }
+  setupWebSocket() {
+    this.ws = new WebSocket(this.endpoint);
+    this.ws.onopen = () => {
+      console.log("WebSocket connection is opening...");
+      this.sendSetupMessage();
+    };
+    this.ws.onmessage = async (event) => {
+      try {
+        let wsResponse;
+        if (event.data instanceof Blob) {
+          const responseText = await event.data.text();
+          wsResponse = JSON.parse(responseText);
+        } else {
+          wsResponse = JSON.parse(event.data);
+        }
+        if (wsResponse.setupComplete) {
+          this.onSetupComplete();
+        } else if (wsResponse.toolCall) {
+          this.onToolCall(wsResponse.toolCall);
+        } else if (wsResponse.serverContent) {
+          if (wsResponse.serverContent.interrupted) {
+            this.onInterrupted();
+            return;
+          }
+          if (wsResponse.serverContent.modelTurn?.parts?.[0]?.inlineData) {
+            const audioData =
+              wsResponse.serverContent.modelTurn.parts[0].inlineData.data;
+            this.onAudioData(audioData);
+            if (!wsResponse.serverContent.turnComplete) {
+              this.sendContinueSignal();
+            }
+          }
+          if (wsResponse.serverContent.turnComplete) {
+            this.onTurnComplete();
+          }
+        }
+      } catch (error) {
+        console.error("Error parsing response:", error);
+        this.onError("Error parsing response: " + error.message);
+      }
+    };
+    this.ws.onerror = (error) => {
+      console.error("WebSocket Error:", error);
+      this.onError("WebSocket Error: " + error.message);
+    };
+    this.ws.onclose = (event) => {
+      console.log("Connection closed:", event);
+      this.onClose(event);
+    };
+  }
+  sendMessage(message) {
+    if (this.ws.readyState === WebSocket.OPEN) {
+      this.ws.send(JSON.stringify(message));
+    } else {
+      console.error(
+        "WebSocket is not open. Current state:",
+        this.ws.readyState
+      );
+      this.onError("WebSocket is not ready. Please try again.");
+    }
+  }
+  sendSetupMessage() {
+    if (this.ws.readyState === WebSocket.OPEN) {
+      this.ws.send(this.api_config);
+    } else {
+      console.error("Connection not ready.");
+    }
+  }
+  sendAudioChunk(base64Audio) {
+    const message = {
+      realtime_input: {
+        media_chunks: [
+          {
+            mime_type: "audio/pcm",
+            data: base64Audio,
+          },
+        ],
+      },
+    };
+    this.sendMessage(message);
+  }
+  sendEndMessage() {
+    const message = {
+      client_content: {
+        turns: [
+          {
+            role: "user",
+            parts: [],
+          },
+        ],
+        turn_complete: true,
+      },
+    };
+    this.sendMessage(message);
+  }
+  sendContinueSignal() {
+    const message = {
+      client_content: {
+        turns: [
+          {
+            role: "user",
+            parts: [],
+          },
+        ],
+        turn_complete: false,
+      },
+    };
+    this.sendMessage(message);
+  }
+  sendTextMessage(text) {
+    this.sendMessage({
+      client_content: {
+        turn_complete: true,
+        turns: [{ role: "user", parts: [{ text: text }] }],
+      },
+    });
+  }
+  sendToolResponse(functionResponses) {
+    const toolResponse = {
+      tool_response: {
+        function_responses: functionResponses,
+      },
+    };
+    this.sendMessage(toolResponse);
+  }
+  async ensureConnected() {
+    if (this.ws.readyState === WebSocket.OPEN) {
+      return;
+    }
+    return new Promise((resolve, reject) => {
+      const timeout = setTimeout(() => {
+        reject(new Error("Connection timeout"));
+      }, 5000);
+      const onOpen = () => {
+        clearTimeout(timeout);
+        this.ws.removeEventListener("open", onOpen);
+        this.ws.removeEventListener("error", onError);
+        resolve();
+      };
+      const onError = (error) => {
+        clearTimeout(timeout);
+        this.ws.removeEventListener("open", onOpen);
+        this.ws.removeEventListener("error", onError);
+        reject(error);
+      };
+      this.ws.addEventListener("open", onOpen);
+      this.ws.addEventListener("error", onError);
+    });
+  }
+  render() {
+    if (this.enabled) {
+      return html`<span @click="${this.stop}"><slot></slot></span>`;
+    } else {
+      return html`<span @click="${this.start}"><slot></slot></span>`;
+    }
+  }
+}
+customElements.define("gemini-live-connection", GeminiLiveConnection);

web_components/gemini_live_connection.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from typing import Any, Callable
+import mesop.labs as mel
+_HOST = "generativelanguage.googleapis.com"
+_GEMINI_BIDI_WEBSOCKET_URI = "wss://{host}/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent?key={api_key}"
+@mel.web_component(path="./gemini_live_connection.js")
+def gemini_live_connection(
+  *,
+  enabled: bool = False,
+  api_key: str = "",
+  api_config: str = "",
+  on_start: Callable[[mel.WebEvent], Any] | None = None,
+  on_stop: Callable[[mel.WebEvent], Any] | None = None,
+  on_tool_call: Callable[[mel.WebEvent], Any] | None = None,
+  tool_call_responses: str = "",
+  text_input: str = "",
+):
+  return mel.insert_web_component(
+    name="gemini-live-connection",
+    events=_filter_events(
+      {
+        "startEvent": on_start,
+        "stopEvent": on_stop,
+        "toolCallEvent": on_tool_call,
+      }
+    ),
+    properties={
+      "api_config": api_config,
+      "enabled": enabled,
+      "endpoint": _GEMINI_BIDI_WEBSOCKET_URI.format(host=_HOST, api_key=api_key),
+      "tool_call_responses": tool_call_responses,
+      "text_input": text_input,
+    },
+  )
+def _filter_events(events: dict[str, Callable[[mel.WebEvent], Any] | None]):
+  return {event: callback for event, callback in events.items() if callback}

web_components/video_recorder.js ADDED Viewed

	@@ -0,0 +1,223 @@

+import {
+  LitElement,
+  html,
+  css,
+} from "https://cdn.jsdelivr.net/gh/lit/dist@3/core/lit-core.min.js";
+class VideoRecorder extends LitElement {
+  static styles = css`
+    :host {
+      display: block;
+    }
+    .video-container {
+      position: relative;
+      width: 100%;
+      max-width: 640px;
+      margin: 0 auto;
+    }
+    video {
+      width: 100%;
+      height: auto;
+      background: #000;
+    }
+    .controls {
+      margin-top: 10px;
+      text-align: center;
+    }
+    button {
+      padding: 8px 16px;
+      font-size: 16px;
+      cursor: pointer;
+    }
+  `;
+  static properties = {
+    dataEvent: { type: String },
+    recordEvent: { type: String },
+    isRecording: { type: Boolean },
+    enabled: { type: Boolean },
+    quality: { type: Number },
+    fps: { type: Number },
+    showPreview: { type: Boolean },
+  };
+  constructor() {
+    super();
+    this.debug = false;
+    this.mediaStream = null;
+    this.isStreaming = false;
+    this.isRecording = false;
+    this.isInitializing = false;
+    this.enabled = false;
+    this.quality = 0.8; // JPEG quality
+    this.fps = 2; // Frames per second
+    this.showPreview = true; // Enable preview by default
+    // Setup canvas and video elements
+    this.video = document.createElement("video");
+    this.video.setAttribute("playsinline", ""); // Better mobile support
+    this.video.setAttribute("autoplay", "");
+    this.video.setAttribute("muted", "");
+    this.canvas = document.createElement("canvas");
+    this.ctx = this.canvas.getContext("2d");
+    this.captureInterval = null;
+  }
+  disconnectedCallback() {
+    this.stop();
+    super.disconnectedCallback();
+  }
+  firstUpdated() {
+    if (this.enabled) {
+      this.startStreaming();
+    }
+  }
+  log(...args) {
+    if (this.debug) {
+      console.log(...args);
+    }
+  }
+  warn(...args) {
+    if (this.debug) {
+      console.warn(...args);
+    }
+  }
+  error(...args) {
+    if (this.debug) {
+      console.error(...args);
+    }
+  }
+  async startStreaming() {
+    if (!this.enabled) {
+      // this.dispatchEvent(new MesopEvent(this.recordEvent, {}));
+    }
+    this.isInitializing = true;
+    const initialized = await this.initialize();
+    this.isInitializing = false;
+    if (initialized) {
+      this.isRecording = true;
+      this.start();
+    }
+  }
+  async initialize() {
+    try {
+      this.mediaStream = await navigator.mediaDevices.getUserMedia({
+        video: {
+          width: { ideal: 1280 },
+          height: { ideal: 720 },
+        },
+      });
+      this.video.srcObject = this.mediaStream;
+      await this.video.play();
+      // Wait for video to be ready
+      await new Promise((resolve) => {
+        this.video.onloadedmetadata = () => {
+          this.canvas.width = this.video.videoWidth;
+          this.canvas.height = this.video.videoHeight;
+          resolve();
+        };
+      });
+      // Request a redraw to show the video preview
+      this.requestUpdate();
+      return true;
+    } catch (error) {
+      this.error("Error accessing webcam:", error);
+      return false;
+    }
+  }
+  captureFrame() {
+    if (!this.mediaStream) {
+      this.error("Webcam not started");
+      return null;
+    }
+    // Draw current video frame to canvas
+    this.ctx.drawImage(this.video, 0, 0);
+    // Convert to JPEG and base64 encode
+    const base64Data = this.canvas.toDataURL("image/jpeg", this.quality);
+    // Remove the data URL prefix to get just the base64 data
+    return base64Data.replace("data:image/jpeg;base64,", "");
+  }
+  start() {
+    this.isStreaming = true;
+    // Start capturing frames at specified FPS
+    const intervalMs = 1000 / this.fps;
+    this.captureInterval = setInterval(() => {
+      const base64Frame = this.captureFrame();
+      if (base64Frame) {
+        this.dispatchEvent(
+          new MesopEvent(this.dataEvent, {
+            data: base64Frame,
+          })
+        );
+      }
+    }, intervalMs);
+    return true;
+  }
+  stop() {
+    this.isStreaming = false;
+    this.isRecording = false;
+    if (this.captureInterval) {
+      clearInterval(this.captureInterval);
+      this.captureInterval = null;
+    }
+    if (this.mediaStream) {
+      this.mediaStream.getTracks().forEach((track) => track.stop());
+      this.mediaStream = null;
+    }
+    // Clear video source
+    if (this.video.srcObject) {
+      this.video.srcObject = null;
+    }
+  }
+  render() {
+    return html`
+      <div class="video-container">
+        ${this.showPreview && (this.isRecording || this.isInitializing)
+          ? html`<video
+              .srcObject="${this.mediaStream}"
+              playsinline
+              autoplay
+              muted
+            ></video>`
+          : null}
+        <div class="controls">
+          ${this.isInitializing
+            ? html`<div>Initializing video recorder...</div>`
+            : this.isRecording
+            ? html`<button @click="${this.stop}">Stop Recording</button>`
+            : html`<button @click="${this.startStreaming}">
+                Start Recording
+              </button>`}
+        </div>
+      </div>
+    `;
+  }
+}
+customElements.define("video-recorder", VideoRecorder);

web_components/video_recorder.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from typing import Any, Callable
+import mesop.labs as mel
+@mel.web_component(path="./video_recorder.js")
+def video_recorder(
+  *,
+  enabled: bool = False,
+  on_data: Callable[[mel.WebEvent], Any],
+  on_record: Callable[[mel.WebEvent], Any],
+):
+  """Records video and streams video to the Mesop server.
+  This web components is designed to work with `MESOP_WEBSOCKETS_ENABLED=true`.
+  The data event looks like:
+    {
+      "data": <base64-encoded-string>
+    }
+  """
+  return mel.insert_web_component(
+    name="video-recorder",
+    events={
+      "dataEvent": on_data,
+      "recordEvent": on_record,
+    },
+    properties={
+      "enabled": enabled,
+    },
+  )