File size: 3,666 Bytes
5946632
 
 
37f6968
 
 
5946632
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
987f32b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c9ae4a3
 
 
 
 
 
 
 
 
 
 
 
 
5946632
987f32b
 
 
 
 
 
 
 
 
 
 
 
 
 
5946632
 
987f32b
5946632
 
 
 
 
 
 
 
 
 
c9ae4a3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from dataclasses import dataclass, field
from typing import Any, Union

import gradio as gr


Observation = Union[str, dict[str, Any]]
Action = Union[str, dict[str, Any]]  # e.g., user message, tool call schema


@dataclass
class StepResult:
    observation: Observation
    reward: float
    done: bool
    info: dict[str, Any] = field(default_factory=dict)


class WordleEnv:
    """
    Demonstration env. Not a full game; 4-letter variant for brevity.

    Observations are emoji strings; actions are 4-letter lowercase words.
    Reward is 1.0 on success, else 0.0. Terminal on success or after 6 guesses.
    """

    def __init__(self, *, secret: str = "word", max_guesses: int = 6) -> None:
        assert len(secret) == 4 and secret.isalpha()
        self._secret = secret
        self._max = max_guesses
        self._n = 0
        self._obs = "⬜" * 4

    def reset(self) -> Observation:  # noqa: ARG002
        self._n = 0
        self._obs = "⬜" * 4
        return self._obs

    def step(self, action: Action) -> StepResult:
        guess: str = str(action)
        guess = guess.strip().lower()
        if len(guess) != 4 or not guess.isalpha():
            return StepResult(self._obs, -0.05, False, {"error": "invalid guess"})
        self._n += 1
        secret = self._secret
        feedback: list[str] = []
        for i, ch in enumerate(guess):
            if ch == secret[i]:
                feedback.append("🟩")
            elif ch in secret:
                feedback.append("🟨")
            else:
                feedback.append("⬜")
        self._obs = "".join(feedback)
        done = guess == secret or self._n >= self._max
        reward = 1.0 if guess == secret else 0.0
        return StepResult(self._obs, reward, done, {"guesses": self._n})

    def render(self) -> str:
        return self._obs


# def step_fn(guess: str, wordle) -> tuple[str, float, bool, dict]:
#     """
#     Perform a step in the Wordle environment.
#
#     Args:
#         guess (str): The guessed word (4-letter lowercase string).
#
#     Returns:
#         tuple[str, float, bool, dict]: A tuple containing:
#             - observation: The observation after the step .
#             - reward: The reward obtained from the step.
#             - done: Whether the game is done.
#             - info: Additional info.
#     """
#     result = wordle.step(guess)
#     return result.observation, result.reward, result.done, result.info, wordle

wordle = WordleEnv(secret="word")


def step_fn(guess: str) -> tuple[str, float, bool, dict]:
    """
    Perform a step in the Wordle environment.

    Args:
        guess (str): The guessed word (4-letter lowercase string).

    Returns:
        tuple[str, float, bool, dict]: A tuple containing:
            - observation: The observation after the step .
            - reward: The reward obtained from the step.
            - done: Whether the game is done.
            - info: Additional info.
    """
    result = wordle.step(guess)
    return result.observation, result.reward, result.done, result.info


# demo = gr.Interface(
#     fn=step_fn,
#     inputs=["text", gr.State(WordleEnv(secret="word"))],
#     outputs=[
#         gr.Textbox(label="Observation"),
#         gr.Number(label="Reward"),
#         gr.Textbox(label="Done"),
#         gr.Textbox(label="Info"),
#         gr.State(),
#     ],
# )
demo = gr.Interface(
    fn=step_fn,
    inputs=["text"],
    outputs=[
        gr.Textbox(label="Observation"),
        gr.Number(label="Reward"),
        gr.Textbox(label="Done"),
        gr.Textbox(label="Info"),
    ],
)


if __name__ == "__main__":
    demo.launch(mcp_server=True)