Lyte commited on
Commit
5a73433
ยท
verified ยท
1 Parent(s): d175a25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +301 -58
app.py CHANGED
@@ -1,81 +1,324 @@
1
  import os
2
  import gradio as gr
3
  from llama_cpp import Llama
4
- from huggingface_hub import hf_hub_download, login
5
- #import os
6
 
7
  #login(os.getenv("HF_TOKEN")) my bad now its public
8
 
9
  model = Llama(
10
  model_path=hf_hub_download(
11
- repo_id=os.environ.get("REPO_ID", "bartowski/HuatuoGPT-o1-7B-v0.1-GGUF"),
12
- filename=os.environ.get("MODEL_FILE", "HuatuoGPT-o1-7B-v0.1-Q4_0.gguf"),
13
  )
14
  )
15
 
16
- DESCRIPTION = '''
17
- # FreedomIntelligence/HuatuoGPT-o1-7B | Duplicate the space and set it to private for faster & personal inference for free.
18
- HuatuoGPT-o1 is a medical LLM designed for advanced medical reasoning.
19
- It generates a complex thought process, reflecting and refining its reasoning, before providing a final response.
20
-
21
- **To start a new chat**, click "clear" and start a new dialog.
22
- '''
23
-
24
- LICENSE = """
25
- --- Apache 2.0 License ---
26
  """
27
 
28
- def user(message, history):
29
- return "", history + [{"role": "user", "content": message}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- def generate_text(history, max_tokens=512, temperature=0.9, top_p=0.95):
32
- """Generate a response using the Llama model."""
33
- messages = [{"role": item["role"], "content": item["content"]} for item in history[:-1]]
34
- message = history[-1]['content']
35
-
36
- response = model.create_chat_completion(
37
- messages=messages + [{"role": "user", "content": message}],
38
- temperature=temperature,
39
- max_tokens=max_tokens,
40
- top_p=top_p,
41
- stream=True,
42
- )
43
- history.append({"role": "assistant", "content": ""})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- for streamed in response:
46
- delta = streamed["choices"][0].get("delta", {})
47
- text_chunk = delta.get("content", "")
48
- history[-1]['content'] += text_chunk
49
- yield history
50
 
51
- with gr.Blocks() as demo:
52
- gr.Markdown(DESCRIPTION)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- chatbot = gr.Chatbot(type="messages")
55
- msg = gr.Textbox()
56
- clear = gr.Button("Clear")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- with gr.Accordion("Adjust Parameters", open=False):
59
- max_tokens = gr.Slider(minimum=512, maximum=4096, value=1024, step=1, label="Max Tokens")
60
- temperature = gr.Slider(minimum=0.1, maximum=1.5, value=0.9, step=0.1, label="Temperature")
61
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
64
- generate_text, [chatbot, max_tokens, temperature, top_p], chatbot
65
- )
66
- clear.click(lambda: None, None, chatbot, queue=False)
 
 
 
 
 
67
 
68
- gr.Examples(
69
- examples=[
70
- ["How many r's are in the word strawberry?"],
71
- ['How to stop a cough?'],
72
- ['How do I relieve feet pain?'],
73
- ],
74
- inputs=msg,
75
- label="Examples",
76
- )
 
 
 
 
 
77
 
78
- gr.Markdown(LICENSE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
 
80
  if __name__ == "__main__":
81
- demo.launch()
 
 
1
  import os
2
  import gradio as gr
3
  from llama_cpp import Llama
4
+ from huggingface_hub import hf_hub_download#, login
5
+ import numpy as np
6
 
7
  #login(os.getenv("HF_TOKEN")) my bad now its public
8
 
9
  model = Llama(
10
  model_path=hf_hub_download(
11
+ repo_id=os.environ.get("REPO_ID", "Lyte/QuadConnect2.5-0.5B-GRPO"),
12
+ filename=os.environ.get("MODEL_FILE", "unsloth.Q8_0.gguf"),
13
  )
14
  )
15
 
16
+ SYSTEM_PROMPT = """You are a Connect Four player[Connect Four is played on a 6 x 7 grid (with 6 rows and 7 columns]. Given the current board state, predict the next move. Respond in the following format:
17
+ <reasoning>
18
+ Explain your reasoning for choosing the move, considering the current board state and potential future moves.
19
+ </reasoning>
20
+ <move>
21
+ The column and row of your move in the format 'a1', 'b3', 'g5', 'c6', etc. (column letter followed by row number).
22
+ </move>
 
 
 
23
  """
24
 
25
+ class ConnectFour:
26
+ def __init__(self):
27
+ self.board = np.zeros((6, 7))
28
+ self.current_player = 1 # 1 for player, 2 for AI
29
+ self.game_over = False
30
+
31
+ def make_move(self, col):
32
+ if self.game_over:
33
+ return False, -1
34
+
35
+ # Find the lowest empty row in the selected column
36
+ for row in range(5, -1, -1):
37
+ if self.board[row][col] == 0:
38
+ self.board[row][col] = self.current_player
39
+ return True, row
40
+ return False, -1
41
 
42
+ def check_winner(self):
43
+ # Check horizontal
44
+ for row in range(6):
45
+ for col in range(4):
46
+ if (self.board[row][col] != 0 and
47
+ self.board[row][col] == self.board[row][col+1] ==
48
+ self.board[row][col+2] == self.board[row][col+3]):
49
+ return self.board[row][col]
50
+
51
+ # Check vertical
52
+ for row in range(3):
53
+ for col in range(7):
54
+ if (self.board[row][col] != 0 and
55
+ self.board[row][col] == self.board[row+1][col] ==
56
+ self.board[row+2][col] == self.board[row+3][col]):
57
+ return self.board[row][col]
58
+
59
+ # Check diagonal (positive slope)
60
+ for row in range(3):
61
+ for col in range(4):
62
+ if (self.board[row][col] != 0 and
63
+ self.board[row][col] == self.board[row+1][col+1] ==
64
+ self.board[row+2][col+2] == self.board[row+3][col+3]):
65
+ return self.board[row][col]
66
+
67
+ # Check diagonal (negative slope)
68
+ for row in range(3, 6):
69
+ for col in range(4):
70
+ if (self.board[row][col] != 0 and
71
+ self.board[row][col] == self.board[row-1][col+1] ==
72
+ self.board[row-2][col+2] == self.board[row-3][col+3]):
73
+ return self.board[row][col]
74
+
75
+ return 0
76
+
77
+ def board_to_string(self):
78
+ moves = []
79
+ for row in range(6):
80
+ for col in range(7):
81
+ if self.board[row][col] != 0:
82
+ col_letter = chr(ord('a') + col)
83
+ row_num = str(6 - row) # Convert to 1-based indexing
84
+ player = "X" if self.board[row][col] == 1 else "O"
85
+ moves.append(f"{col_letter}{row_num}={player}")
86
+ return ", ".join(moves)
87
 
88
+ def parse_ai_move(self, move_str):
89
+ # Parse move like 'a1', 'b3', etc.
90
+ col = ord(move_str[0].lower()) - ord('a')
91
+ return col
 
92
 
93
+ def create_interface():
94
+ game = ConnectFour()
95
+
96
+ css = """
97
+ .connect4-board {
98
+ display: grid;
99
+ grid-template-columns: repeat(7, 1fr);
100
+ gap: 8px;
101
+ max-width: 600px;
102
+ margin: 10px auto;
103
+ background: #2196F3;
104
+ padding: 15px;
105
+ border-radius: 15px;
106
+ box-shadow: 0 4px 8px rgba(0,0,0,0.2);
107
+ }
108
+ .connect4-cell {
109
+ aspect-ratio: 1;
110
+ background: white;
111
+ border-radius: 50%;
112
+ display: flex;
113
+ align-items: center;
114
+ justify-content: center;
115
+ font-size: 2em;
116
+ }
117
+ .player1 { background: #f44336 !important; }
118
+ .player2 { background: #ffc107 !important; }
119
+ #ai-status {
120
+ font-size: 1.2em;
121
+ margin: 10px 0;
122
+ color: #2196F3;
123
+ font-weight: bold;
124
+ }
125
+ #ai-reasoning {
126
+ background: #22004d;
127
+ border-radius: 10px;
128
+ padding: 15px;
129
+ margin: 15px 0;
130
+ font-family: monospace;
131
+ min-height: 100px;
132
+ }
133
+ .reasoning-box {
134
+ border-left: 4px solid #2196F3;
135
+ padding-left: 15px;
136
+ margin: 10px 0;
137
+ background: #22004d;
138
+ border-radius: 0 10px 10px 0;
139
+ }
140
+ #column-buttons {
141
+ display: flex;
142
+ justify-content: center;
143
+ align-items: anchor-center;
144
+ max-width: 600px;
145
+ margin: 0 auto;
146
+ padding: 0 15px;
147
+ }
148
+ #column-buttons button {
149
+ margin: 0px 7px;
150
+ }
151
+ div.svelte-iyf88w {
152
+ display: block;
153
+ }
154
+ """
155
 
156
+ with gr.Blocks(css=css) as interface:
157
+ gr.Markdown("# ๐ŸŽฎ Connect Four vs AI")
158
+ gr.Markdown("### This is just a quick prototype for now, and the current model was trained just for 200 steps to test the concept, the reward functions were flawed, update coming soon!")
159
+
160
+ with gr.Row():
161
+ with gr.Column(scale=2):
162
+ # Status display
163
+ status = gr.Markdown("Your turn! Click a button to drop your piece!", elem_id="ai-status")
164
+
165
+ # Column buttons
166
+ with gr.Group(elem_id="column-buttons"):
167
+ col_buttons = []
168
+ for i in range(7):
169
+ btn = gr.Button(f"โฌ‡๏ธ {i+1}", scale=1)
170
+ col_buttons.append(btn)
171
+
172
+ # Game board
173
+ board_display = gr.HTML(render_board(), elem_id="board-display")
174
+ reset_btn = gr.Button("๐Ÿ”„ New Game", variant="primary")
175
+
176
+ with gr.Column(scale=1):
177
+ # AI reasoning display
178
+ gr.Markdown("### ๐Ÿค– AI's Thoughts")
179
+ reasoning_display = gr.HTML(
180
+ value='<div id="ai-reasoning">Waiting for your move...</div>',
181
+ elem_id="ai-reasoning-container"
182
+ )
183
 
184
+ def handle_move(col):
185
+ if game.game_over:
186
+ return [
187
+ render_board(game.board),
188
+ "Game is over! Click New Game to play again.",
189
+ '<div id="ai-reasoning">Game Over!</div>'
190
+ ]
191
+
192
+ # Player move
193
+ success, row = game.make_move(col)
194
+ if not success:
195
+ return [
196
+ render_board(game.board),
197
+ "Column is full! Try another one.",
198
+ '<div id="ai-reasoning">Invalid move!</div>'
199
+ ]
200
+
201
+ # Check for winner
202
+ winner = game.check_winner()
203
+ if winner == 1:
204
+ game.game_over = True
205
+ return [
206
+ render_board(game.board),
207
+ "๐ŸŽ‰ You win! ๐ŸŽ‰",
208
+ '<div id="ai-reasoning">Congratulations! You won!</div>'
209
+ ]
210
+
211
+ # AI move
212
+ game.current_player = 2
213
+ board_state = game.board_to_string()
214
+ prompt = f"Current Board: {board_state}. Make a move."
215
+
216
+ # Get AI response
217
+ response = model.create_chat_completion(
218
+ messages=[
219
+ {"role": "system", "content": SYSTEM_PROMPT},
220
+ {"role": "user", "content": prompt}
221
+ ],
222
+ temperature=0.7,
223
+ max_tokens=512
224
+ )
225
+
226
+ ai_response = response['choices'][0]['message']['content']
227
+
228
+ # Extract reasoning and move
229
+ try:
230
+ reasoning = ai_response.split("<reasoning>")[1].split("</reasoning>")[0].strip()
231
+ move_str = ai_response.split("<move>")[1].split("</move>")[0].strip()
232
+ ai_col = game.parse_ai_move(move_str)
233
+
234
+ # Format reasoning for display
235
+ reasoning_html = f'''
236
+ <div id="ai-reasoning">
237
+ <div class="reasoning-box">
238
+ <p><strong>๐Ÿค” Reasoning:</strong></p>
239
+ <p>{reasoning}</p>
240
+ <p><strong>๐Ÿ“ Move chosen:</strong> {move_str}</p>
241
+ </div>
242
+ </div>
243
+ '''
244
+
245
+ success, _ = game.make_move(ai_col)
246
+ if success:
247
+ # Check for AI winner
248
+ winner = game.check_winner()
249
+ if winner == 2:
250
+ game.game_over = True
251
+ return [
252
+ render_board(game.board),
253
+ "๐Ÿค– AI wins! Better luck next time!",
254
+ reasoning_html
255
+ ]
256
+ else:
257
+ return [
258
+ render_board(game.board),
259
+ "AI made invalid move! You win by default!",
260
+ '<div id="ai-reasoning">AI made an invalid move!</div>'
261
+ ]
262
+ except Exception as e:
263
+ game.game_over = True
264
+ return [
265
+ render_board(game.board),
266
+ "AI error occurred! You win by default!",
267
+ f'<div id="ai-reasoning">Error: {str(e)}</div>'
268
+ ]
269
+
270
+ game.current_player = 1
271
+ return [render_board(game.board), "Your turn!", reasoning_html]
272
 
273
+ def reset_game():
274
+ game.board = np.zeros((6, 7))
275
+ game.current_player = 1
276
+ game.game_over = False
277
+ return [
278
+ render_board(),
279
+ "Your turn! Click a button to drop your piece!",
280
+ '<div id="ai-reasoning">New game started! Make your move...</div>'
281
+ ]
282
 
283
+ # Event handlers
284
+ for i, btn in enumerate(col_buttons):
285
+ btn.click(
286
+ fn=handle_move,
287
+ inputs=[gr.Number(value=i, visible=False)],
288
+ outputs=[board_display, status, reasoning_display]
289
+ )
290
+
291
+ reset_btn.click(
292
+ fn=reset_game,
293
+ outputs=[board_display, status, reasoning_display]
294
+ )
295
+
296
+ return interface
297
 
298
+ def render_board(board=None):
299
+ if board is None:
300
+ board = np.zeros((6, 7))
301
+
302
+ html = '<div class="connect4-board">'
303
+
304
+ for row in range(6):
305
+ for col in range(7):
306
+ cell_class = "connect4-cell"
307
+ content = "โšช"
308
+
309
+ if board[row][col] == 1:
310
+ cell_class += " player1"
311
+ content = "๐Ÿ”ด"
312
+ elif board[row][col] == 2:
313
+ cell_class += " player2"
314
+ content = "๐ŸŸก"
315
+
316
+ html += f'<div class="{cell_class}">{content}</div>'
317
+
318
+ html += "</div>"
319
+ return html
320
 
321
+ # Launch the interface
322
  if __name__ == "__main__":
323
+ interface = create_interface()
324
+ interface.launch(debug=True)