QuadConnect-beta

Running

File size: 17,992 Bytes

import os
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import numpy as np
from typing import List

model = Llama(
    model_path=hf_hub_download(
        repo_id=os.environ.get("REPO_ID", "Lyte/QuadConnect2.5-0.5B-v0.0.9b"), #"Lyte/QuadConnect2.5-0.5B-v0.0.9b"),
        filename=os.environ.get("MODEL_FILE", "unsloth.Q8_0.gguf"),
    ),
    n_ctx=16384
)

SYSTEM_PROMPT = """You are a master Connect Four strategist whose goal is to win while preventing your opponent from winning. The game is played on a 6x7 grid (columns a–g, rows 1–6 with 1 at the bottom) where pieces drop to the lowest available spot.

Board:
- Represented as a list of occupied cells in the format: <column><row>(<piece>), e.g., 'a1(O)'.
- For example: 'a1(O), a2(X), b1(O)' indicates that cell a1 has an O, a2 has an X, and b1 has an O.
- An empty board is shown as 'Empty Board'.
- Win by connecting 4 pieces in any direction (horizontal, vertical, or diagonal).

Strategy:
1. Identify taken positions, and empty positions.
2. Find and execute winning moves.
3. If There isn't a winning move, then block your opponent's potential wins.
4. Control the center and set up future moves.

Respond in XML:
<reasoning>
Explain your thought process, focusing on your winning move, how you block your opponent, and your strategic plans.
</reasoning>
<move>
Specify the column letter (a–g) for your next move.
</move>
"""

def extract_xml_move(text: str) -> str:
    """
    Extracts the move (a single column letter a–g) from the XML format
    using an improved regex. This function is kept simple for reuse.
    """
    import re
    match = re.search(r'<move>\s*([a-g])\s*</move>', text)
    if match:
        return match.group(1)
    return ""

def convert_moves_to_coordinate_list(moves_list: List[str]) -> str:
    """
    Converts a list of moves to a coordinate list representation.
    Each move is formatted as <column><row>(<piece>).
    Returns "Empty Board" if no moves are present.
    """
    # Create an empty 6x7 grid (row 1 is at index 0)
    grid = [['.' for _ in range(7)] for _ in range(6)]
    
    for i, move in enumerate(moves_list):
        if not move:
            continue
        col = ord(move[0]) - ord('a')
        # Find the lowest available row in this column:
        for row in range(6):
            if grid[row][col] == '.':
                grid[row][col] = 'X' if i % 2 == 0 else 'O'
                break
    
    # Build coordinate list: Only include cells with a piece.
    coords = []
    for row in range(6):
        for col in range(7):
            if grid[row][col] != '.':
                # Convert row index to board row number (row 0 -> 1, etc.)
                coords.append(f"{chr(col + ord('a'))}{row+1}({grid[row][col]})")
    
    return ", ".join(coords) if coords else "Empty Board"

def parse_coordinate_list(board_str: str) -> List[List[str]]:
    """
    Converts a coordinate list representation (e.g., "a1(O), a2(X), b1(O)")
    into a 6x7 grid (list of lists) with row index 0 as the bottom.
    """
    grid = [['.' for _ in range(7)] for _ in range(6)]
    if not board_str.strip() or board_str == "Empty Board":
        return grid
    coords = board_str.split(",")
    for coord in coords:
        coord = coord.strip()
        # Expecting format: a1(O)
        if len(coord) < 4:
            continue
        col_letter = coord[0]
        try:
            row_number = int(coord[1])
        except ValueError:
            continue
        piece = coord[3]  # The piece inside the parentheses
        col = ord(col_letter) - ord('a')
        row = row_number - 1
        if 0 <= row < 6 and 0 <= col < 7:
            grid[row][col] = piece
    return grid

def get_available_positions(board_moves: List[str]) -> str:
    """Returns all available positions per column after simulating gravity."""
    # Initialize empty grid ('.' means empty)
    grid = [['.' for _ in range(7)] for _ in range(6)]
    
    # Place each move into the lowest available slot in its column
    for i, move in enumerate(board_moves):
        if not move:
            continue
        col = ord(move[0]) - ord('a')
        for row in range(6):
            if grid[row][col] == '.':
                grid[row][col] = 'X' if i % 2 == 0 else 'O'
                break

    # For each column, list all empty positions (which will be above the placed pieces)
    available = []
    for col in range(7):
        col_letter = chr(ord('a') + col)
        positions = []
        for row in range(6):
            if grid[row][col] == '.':
                positions.append(f"{col_letter}{row + 1}")
        if positions:
            available.append(f"Column {col_letter}: {', '.join(positions)}")
        else:
            available.append(f"Column {col_letter}: Full")
    
    return "\n  ".join(available)

class ConnectFour:
    def __init__(self):
        self.board = np.zeros((6, 7))
        self.current_player = 1  # 1 for player (X), 2 for AI (O)
        self.game_over = False
        self.player_moves = []
        self.ai_moves = []
        
    def make_move(self, col):
        if self.game_over:
            return False, -1
            
        # Find the lowest empty row in the selected column
        for row in range(6):
            if self.board[row][col] == 0:
                self.board[row][col] = self.current_player
                # Store the move
                col_letter = chr(ord('a') + col)
                row_num = row + 1  # Converting to 1-based indexing for the coordinate system
                move = f"{col_letter}{row_num}"
                
                if self.current_player == 1:
                    self.player_moves.append(move)
                else:
                    self.ai_moves.append(move)
                    
                return True, row
        return False, -1

    def check_winner(self):
        # Check horizontal
        for row in range(6):
            for col in range(4):
                if (self.board[row][col] != 0 and
                    self.board[row][col] == self.board[row][col+1] == 
                    self.board[row][col+2] == self.board[row][col+3]):
                    return self.board[row][col]
                    
        # Check vertical
        for row in range(3):
            for col in range(7):
                if (self.board[row][col] != 0 and
                    self.board[row][col] == self.board[row+1][col] ==
                    self.board[row+2][col] == self.board[row+3][col]):
                    return self.board[row][col]
                    
        # Check diagonal (positive slope)
        for row in range(3):
            for col in range(4):
                if (self.board[row][col] != 0 and
                    self.board[row][col] == self.board[row+1][col+1] ==
                    self.board[row+2][col+2] == self.board[row+3][col+3]):
                    return self.board[row][col]
                    
        # Check diagonal (negative slope)
        for row in range(3, 6):
            for col in range(4):
                if (self.board[row][col] != 0 and
                    self.board[row][col] == self.board[row-1][col+1] ==
                    self.board[row-2][col+2] == self.board[row-3][col+3]):
                    return self.board[row][col]
                    
        return 0

    def board_to_string(self):
        moves = []
        for row in range(6):
            for col in range(7):
                if self.board[row][col] != 0:
                    col_letter = chr(ord('a') + col)
                    row_num = str(row + 1)  # Convert to 1-based indexing
                    piece = "X" if self.board[row][col] == 1 else "O"
                    moves.append(f"{col_letter}{row_num}({piece})")
        return ", ".join(moves) if moves else "Empty Board"

    def get_board_moves(self):
        """
        Returns a list of all moves made in the game in the format 'a1', 'b2', etc.
        This is used for the get_available_positions function.
        """
        moves = []
        for row in range(6):
            for col in range(7):
                if self.board[row][col] != 0:
                    col_letter = chr(ord('a') + col)
                    row_num = str(row + 1)
                    moves.append(f"{col_letter}{row_num}")
        return moves

    def format_game_state(self):
        board_str = self.board_to_string()
        board_moves = self.get_board_moves()
        available_positions = get_available_positions(board_moves)
        
        # Format player and AI moves
        player_moves_str = ", ".join(self.player_moves) if self.player_moves else ""
        ai_moves_str = ", ".join(self.ai_moves) if self.ai_moves else ""
        
        # Format according to the new template
        game_state = f"""Game State:
- You are playing as: O
- Your previous moves: {ai_moves_str}
- Opponent's moves: {player_moves_str}
- Current board state: {board_str}
- Next available position per column:
  {available_positions}

Make your move."""
        return game_state

    def parse_ai_move(self, move_str):
        # Parse move like 'a', 'b', etc.
        try:
            col = ord(move_str.strip().lower()) - ord('a')
            if 0 <= col <= 6:
                return col
            return -1
        except:
            return -1

def create_interface():
    game = ConnectFour()
    
    css = """
    .connect4-board {
        display: grid;
        grid-template-columns: repeat(7, 1fr);
        gap: 8px;
        max-width: 600px;
        margin: 10px auto;
        background: #2196F3;
        padding: 15px;
        border-radius: 15px;
        box-shadow: 0 4px 8px rgba(0,0,0,0.2);
    }
    .connect4-cell {
        aspect-ratio: 1;
        background: white;
        border-radius: 50%;
        display: flex;
        align-items: center;
        justify-content: center;
        font-size: 2em;
    }
    .player1 { background: #f44336 !important; }
    .player2 { background: #ffc107 !important; }
    #ai-status {
        font-size: 1.2em;
        margin: 10px 0;
        color: #2196F3;
        font-weight: bold;
    }
    #ai-reasoning {
        background: #22004d;
        border-radius: 10px;
        padding: 15px;
        margin: 15px 0;
        font-family: monospace;
        min-height: 100px;
    }
    .reasoning-box {
        border-left: 4px solid #2196F3;
        padding-left: 15px;
        margin: 10px 0;
        background: #22004d;
        border-radius: 0 10px 10px 0;
    }
    #column-buttons {
        display: flex;
        justify-content: center;
        align-items: anchor-center;
        max-width: 600px;
        margin: 0 auto;
        padding: 0 15px;
    }
    #column-buttons button {
      margin: 0px 5px;
    }
    div.svelte-1nguped {
      display: block;
    }
    """

    with gr.Blocks(css=css) as interface:
        gr.Markdown("# 🎮 Connect Four vs AI")
        gr.Markdown("### Play against an AI trained to be an expert Connect Four player!")
        
        with gr.Row():
            with gr.Column(scale=2):
                # Status display
                status = gr.Markdown("Your turn! Click a button to drop your piece!", elem_id="ai-status")
                
                # Column buttons
                with gr.Group(elem_id="column-buttons"):
                    col_buttons = []
                    for i in range(7):
                        btn = gr.Button(f"⬇️ {chr(ord('A') + i)}", scale=1)
                        col_buttons.append(btn)
                
                # Game board
                board_display = gr.HTML(render_board(), elem_id="board-display")
                reset_btn = gr.Button("🔄 New Game", variant="primary")
            
            with gr.Column(scale=1):
                # AI reasoning display
                gr.Markdown("### 🤖 AI's Thoughts")
                reasoning_display = gr.HTML(
                    value='<div id="ai-reasoning">Waiting for your move...</div>',
                    elem_id="ai-reasoning-container"
                )
        with gr.Row():
            temperature_slider = gr.Slider(
                minimum=0.0,
                maximum=1.0,
                value=0.8,
                step=0.1,
                label="Temperature",
                info="Lower values make AI more deterministic, higher values more creative"
            )

        def handle_move(col, temperature=0.8):
            if game.game_over:
                return [
                    render_board(game.board),
                    "Game is over! Click New Game to play again.",
                    '<div id="ai-reasoning">Game Over!</div>'
                ]
            
            # Player move
            success, row = game.make_move(col)
            if not success:
                return [
                    render_board(game.board),
                    "Column is full! Try another one.",
                    '<div id="ai-reasoning">Invalid move!</div>'
                ]
            
            # Check for winner
            winner = game.check_winner()
            if winner == 1:
                game.game_over = True
                return [
                    render_board(game.board),
                    "🎉 You win! 🎉",
                    '<div id="ai-reasoning">Congratulations! You won!</div>'
                ]
            
            # AI move
            game.current_player = 2
            
            # Use the new game state formatting
            game_state = game.format_game_state()
            print(game_state)
            
            # Get AI response with user-defined temperature
            response = model.create_chat_completion(
                messages=[
                    {"role": "system", "content": SYSTEM_PROMPT},
                    {"role": "user", "content": game_state}
                ],
                temperature=temperature,
                top_p=0.95,
                max_tokens=1024
            )
            
            ai_response = response['choices'][0]['message']['content']
            print(ai_response)
            
            # Extract reasoning and move
            try:
                reasoning = ai_response.split("<reasoning>")[1].split("</reasoning>")[0].strip()
                move_str = extract_xml_move(ai_response)
                
                if not move_str:
                    raise ValueError("Invalid move format from AI")
                
                ai_col = game.parse_ai_move(move_str)
                
                if ai_col == -1:
                    raise ValueError("Invalid move format from AI")
                
                # Format reasoning for display
                reasoning_html = f'''
                <div id="ai-reasoning">
                    <div class="reasoning-box">
                        <p><strong>🤔 Reasoning:</strong></p>
                        <p>{reasoning}</p>
                        <p><strong>📍 Move chosen:</strong> Column {move_str.upper()}</p>
                    </div>
                </div>
                '''
                
                success, _ = game.make_move(ai_col)
                if success:
                    # Check for AI winner
                    winner = game.check_winner()
                    if winner == 2:
                        game.game_over = True
                        return [
                            render_board(game.board),
                            "🤖 AI wins! Better luck next time!",
                            reasoning_html
                        ]
                else:
                    return [
                        render_board(game.board),
                        "AI made invalid move! You win by default!",
                        '<div id="ai-reasoning">AI made an invalid move!</div>'
                    ]
            except Exception as e:
                game.game_over = True
                return [
                    render_board(game.board),
                    "AI error occurred! You win by default!",
                    f'<div id="ai-reasoning">Error: {str(e)}</div>'
                ]
            
            game.current_player = 1
            return [render_board(game.board), "Your turn!", reasoning_html]

        def reset_game():
            game.board = np.zeros((6, 7))
            game.current_player = 1
            game.game_over = False
            game.player_moves = []
            game.ai_moves = []
            return [
                render_board(),
                "Your turn! Click a button to drop your piece!",
                '<div id="ai-reasoning">New game started! Make your move...</div>'
            ]

        # Event handlers
        for i, btn in enumerate(col_buttons):
            btn.click(
                fn=handle_move,
                inputs=[
                    gr.Number(value=i, visible=False),
                    temperature_slider
                ],
                outputs=[board_display, status, reasoning_display]
            )
        
        reset_btn.click(
            fn=reset_game,
            outputs=[board_display, status, reasoning_display]
        )
        
    return interface

def render_board(board=None):
    if board is None:
        board = np.zeros((6, 7))
        
    html = '<div class="connect4-board">'
    
    # Render from top to bottom to display the board correctly
    for row in range(5, -1, -1):
        for col in range(7):
            cell_class = "connect4-cell"
            content = "⚪"
            
            if board[row][col] == 1:
                cell_class += " player1"
                content = "🔴"
            elif board[row][col] == 2:
                cell_class += " player2"
                content = "🟡"
                
            html += f'<div class="{cell_class}">{content}</div>'
    
    html += "</div>"
    return html

interface = create_interface()
interface.launch()