Spaces:
Sleeping
Sleeping
from dataclasses import dataclass, field | |
from typing import Any, Union | |
import gradio as gr | |
Observation = Union[str, dict[str, Any]] | |
Action = Union[str, dict[str, Any]] # e.g., user message, tool call schema | |
class StepResult: | |
observation: Observation | |
reward: float | |
done: bool | |
info: dict[str, Any] = field(default_factory=dict) | |
class WordleEnv: | |
""" | |
Demonstration env. Not a full game; 4-letter variant for brevity. | |
Observations are emoji strings; actions are 4-letter lowercase words. | |
Reward is 1.0 on success, else 0.0. Terminal on success or after 6 guesses. | |
""" | |
def __init__(self, *, secret: str = "word", max_guesses: int = 6) -> None: | |
assert len(secret) == 4 and secret.isalpha() | |
self._secret = secret | |
self._max = max_guesses | |
self._n = 0 | |
self._obs = "β¬" * 4 | |
def reset(self) -> Observation: # noqa: ARG002 | |
self._n = 0 | |
self._obs = "β¬" * 4 | |
return self._obs | |
def step(self, action: Action) -> StepResult: | |
guess: str = str(action) | |
guess = guess.strip().lower() | |
if len(guess) != 4 or not guess.isalpha(): | |
return StepResult(self._obs, -0.05, False, {"error": "invalid guess"}) | |
self._n += 1 | |
secret = self._secret | |
feedback: list[str] = [] | |
for i, ch in enumerate(guess): | |
if ch == secret[i]: | |
feedback.append("π©") | |
elif ch in secret: | |
feedback.append("π¨") | |
else: | |
feedback.append("β¬") | |
self._obs = "".join(feedback) | |
done = guess == secret or self._n >= self._max | |
reward = 1.0 if guess == secret else 0.0 | |
return StepResult(self._obs, reward, done, {"guesses": self._n}) | |
def render(self) -> str: | |
return self._obs | |
# def step_fn(guess: str, wordle) -> tuple[str, float, bool, dict]: | |
# """ | |
# Perform a step in the Wordle environment. | |
# | |
# Args: | |
# guess (str): The guessed word (4-letter lowercase string). | |
# | |
# Returns: | |
# tuple[str, float, bool, dict]: A tuple containing: | |
# - observation: The observation after the step . | |
# - reward: The reward obtained from the step. | |
# - done: Whether the game is done. | |
# - info: Additional info. | |
# """ | |
# result = wordle.step(guess) | |
# return result.observation, result.reward, result.done, result.info, wordle | |
wordle = WordleEnv(secret="word") | |
def step_fn(guess: str) -> tuple[str, float, bool, dict]: | |
""" | |
Perform a step in the Wordle environment. | |
Args: | |
guess (str): The guessed word (4-letter lowercase string). | |
Returns: | |
tuple[str, float, bool, dict]: A tuple containing: | |
- observation: The observation after the step . | |
- reward: The reward obtained from the step. | |
- done: Whether the game is done. | |
- info: Additional info. | |
""" | |
result = wordle.step(guess) | |
return result.observation, result.reward, result.done, result.info | |
# demo = gr.Interface( | |
# fn=step_fn, | |
# inputs=["text", gr.State(WordleEnv(secret="word"))], | |
# outputs=[ | |
# gr.Textbox(label="Observation"), | |
# gr.Number(label="Reward"), | |
# gr.Textbox(label="Done"), | |
# gr.Textbox(label="Info"), | |
# gr.State(), | |
# ], | |
# ) | |
demo = gr.Interface( | |
fn=step_fn, | |
inputs=["text"], | |
outputs=[ | |
gr.Textbox(label="Observation"), | |
gr.Number(label="Reward"), | |
gr.Textbox(label="Done"), | |
gr.Textbox(label="Info"), | |
], | |
) | |
if __name__ == "__main__": | |
demo.launch(mcp_server=True) | |