Spaces:

davidgturner
/

GaiaAgentEvaluator

Sleeping

File size: 5,971 Bytes

"""
Custom model implementation using Hugging Face Transformers.

This provides a local model implementation compatible with smolagents framework.
"""

import logging
from typing import Dict, List, Optional, Any
from smolagents.models import Model
from transformers import AutoTokenizer, pipeline

logger = logging.getLogger(__name__)

class LocalTransformersModel(Model):
    """Model using local Hugging Face Transformers models that doesn't require API calls."""
    
    def __init__(
        self, 
        model_name: str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0", 
        device: str = "auto",
        max_tokens: int = 512,
        temperature: float = 0.7
    ):
        """
        Initialize a local transformer model.
        
        Args:
            model_name: HuggingFace model identifier
            device: "cpu", "cuda", "auto"
            max_tokens: Maximum new tokens to generate
            temperature: Sampling temperature
        """
        super().__init__()
        
        try:
            print(f"Loading model {model_name}...")
            
            self.model_name = model_name
            self.device = device
            self.max_tokens = max_tokens
            self.temperature = temperature
            
            # Determine if we can use GPU
            if device == "auto":
                import torch
                self.device = "cuda" if torch.cuda.is_available() else "cpu"
            
            # Load tokenizer and pipeline
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            
            # Create text generation pipeline
            self.generator = pipeline(
                "text-generation",
                model=model_name,
                tokenizer=self.tokenizer, 
                device=self.device,
                torch_dtype="auto"
            )
            
            print(f"Model loaded on {self.device}")
            
        except Exception as e:
            logger.error(f"Error loading model {model_name}: {e}")
            print(f"Error loading model: {e}")
            raise
    
    def generate(self, prompt: str, **kwargs) -> str:
        """
        Generate text completion for the given prompt.
        
        Args:
            prompt: Input text
            
        Returns:
            Generated text completion
        """
        try:
            print(f"Generating with prompt: {prompt[:50]}...")
            
            # Actual generation
            response = self.generator(
                prompt,
                max_new_tokens=self.max_tokens,
                temperature=self.temperature,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )
            
            # Extract generated text
            generated_text = response[0]['generated_text']
            
            # Remove the prompt from the beginning
            if generated_text.startswith(prompt):
                generated_text = generated_text[len(prompt):]
            
            return generated_text.strip()
            
        except Exception as e:
            error_msg = f"Error generating text (Local model): {e}"
            logger.error(error_msg)
            print(error_msg)
            return f"Error: {str(e)}"
    
    def generate_with_tools(
        self, 
        messages: List[Dict[str, Any]], 
        tools: Optional[List[Dict[str, Any]]] = None,
        **kwargs
    ) -> Dict[str, Any]:
        """
        Generate a response with tool-calling capabilities.
        This method implements the smolagents BaseModel interface for tool-calling.
        
        Args:
            messages: List of message objects with role and content
            tools: List of tool definitions
            
        Returns:
            Response with message and optional tool calls
        """
        try:
            # Format messages into a prompt
            prompt = self._format_messages_to_prompt(messages, tools)
            
            # Generate response
            completion = self.generate(prompt)
            
            # For now, just return the text without tool parsing
            # In a future enhancement, we could add tool parsing here
            return {
                "message": {
                    "role": "assistant",
                    "content": completion
                }
            }
        except Exception as e:
            logger.error(f"Error generating with tools: {e}")
            print(f"Error generating with tools: {e}")
            return {
                "message": {
                    "role": "assistant",
                    "content": f"Error: {str(e)}"
                }
            }
    
    def _format_messages_to_prompt(
        self, 
        messages: List[Dict[str, Any]], 
        tools: Optional[List[Dict[str, Any]]] = None
    ) -> str:
        """Format chat messages into a text prompt for the model."""
        formatted_prompt = ""
        
        # Include tool descriptions if available
        if tools and len(tools) > 0:
            tool_descriptions = "\n".join([
                f"Tool {i+1}: {tool['name']} - {tool['description']}"
                for i, tool in enumerate(tools)
            ])
            formatted_prompt += f"Available tools:\n{tool_descriptions}\n\n"
        
        # Add conversation history
        for msg in messages:
            role = msg.get("role", "")
            content = msg.get("content", "")
            
            if role == "system":
                formatted_prompt += f"System: {content}\n\n"
            elif role == "user":
                formatted_prompt += f"User: {content}\n\n"
            elif role == "assistant":
                formatted_prompt += f"Assistant: {content}\n\n"
        
        # Add final prompt for assistant
        formatted_prompt += "Assistant: "
        
        return formatted_prompt
        # return f"Error generating response: {str(e)}"