from smolagents import Tool
from transformers import CLIPProcessor, CLIPModel, DetrForObjectDetection, DetrImageProcessor
from PIL import Image
import torch

class ChessBoardRecognitionTool(Tool):
    name = "chess_board_recognition"
    description = "Recognizes the state of a chess board from an image and returns the position representation."
    inputs = {
        "image_path": {
            "type": "string",
            "description": "The path of the image file to elaborate"
        }
    }
    output_type = "string"

    def __init__(self):
        super().__init__()
        self.model_name = "aesat/detr-finetuned-chess"
        self.model = DetrForObjectDetection.from_pretrained(self.model_name)
        self.processor = DetrImageProcessor.from_pretrained(self.model_name)

    def forward(self, image_path: str) -> str:
        try:
            image = Image.open(image_path).convert("RGB")
            inputs = self.processor(images=image, return_tensors="pt")
            with torch.no_grad():
                outputs = self.model(**inputs)

            target_sizes = torch.tensor([image.size[::-1]])
            results = self.processor.post_process_object_detection(
                outputs, target_sizes=target_sizes, threshold=0.9
            )[0]

            result_str = "Chess board description:\n"
            for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
                box = [round(i, 2) for i in box.tolist()]
                result_str += f"Label: {label}, Confidence: {round(score.item(), 3)}, Box: {box}\n"
            return result_str
        except Exception as e:
            return f"Error chess_board_recognition is not working properly, error: {e}, please skip this tool"