| import os |
| import sys |
| import gradio as gr |
| import requests |
| import pandas as pd |
| import logging |
| from datetime import datetime |
| from typing import Optional, Dict, List, Any |
|
|
| from smolagents import LiteLLMModel, CodeAgent, DuckDuckGoSearchTool |
| from agent_utilities import TextInverterTool, PythonScriptExecutor, WebFileDownloader |
|
|
| |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
| logger = logging.getLogger(__name__) |
|
|
| |
| AGENT_SYSTEM_INSTRUCTIONS = """You are an advanced AI assistant designed to solve complex problems systematically. |
| When presented with a question, analyze it thoroughly and provide a comprehensive response. |
| |
| Your final answer should be concise and direct - provide just the essential information requested. |
| - For numerical answers: provide only the number without currency symbols, percentages, or formatting unless explicitly required |
| - For text answers: use minimal words, avoid articles, write numbers as digits unless instructed otherwise |
| - For lists: use comma-separated format without additional formatting |
| |
| Strategic Tool Usage: |
| 1. **Exclusive Tool Usage**: Only use the tools provided in your toolkit - no external tools or libraries |
| 2. **Sequential Processing**: Execute one tool operation per step for clear reasoning |
| 3. **Python Execution Priority**: When questions involve .py files or Python scripts, use PythonScriptExecutor immediately |
| 4. **Text Decoding**: If input appears reversed or encoded (begins with punctuation, reads backwards), apply TextInverterTool first |
| 5. **File Operations**: For downloading requirements, always use WebFileDownloader with appropriate paths |
| 6. **Logical Problem Solving**: Handle puzzles and logic problems directly unless they require text reversal |
| 7. **Persistent Problem Solving**: If initial approaches fail, iterate with alternative strategies using available tools |
| 8. **Search Optimization**: Keep web searches focused and concise due to context limitations |
| |
| Remember: Every problem has a solution - explore different approaches if needed. |
| """ |
|
|
| |
| API_ENDPOINT_BASE = "https://agents-course-unit4-scoring.hf.space" |
| GEMINI_MODEL_ID = "gemini/gemini-2.0-flash-lite" |
|
|
| class EnhancedAIAgent: |
| """Enhanced AI agent wrapper with improved error handling and logging""" |
|
|
| def __init__(self): |
| self._initialize_model() |
| self._setup_agent() |
| logger.info("Enhanced AI Agent initialized successfully") |
|
|
| def _initialize_model(self): |
| """Initialize the LiteLLM model with Gemini configuration""" |
| gemini_key = os.getenv("GEMINI_API_KEY") |
| if not gemini_key: |
| error_msg = "GEMINI_API_KEY environment variable is required but not found" |
| logger.error(error_msg) |
| raise EnvironmentError(error_msg) |
|
|
| try: |
| self.llm_model = LiteLLMModel( |
| model_id=GEMINI_MODEL_ID, |
| api_key=gemini_key, |
| system_prompt=AGENT_SYSTEM_INSTRUCTIONS |
| ) |
| logger.info(f"LiteLLM model configured with {GEMINI_MODEL_ID}") |
| except Exception as e: |
| logger.error(f"Model initialization failed: {str(e)}") |
| raise |
|
|
| def _setup_agent(self): |
| """Configure the code agent with available tools""" |
| tool_collection = [ |
| DuckDuckGoSearchTool(), |
| TextInverterTool, |
| PythonScriptExecutor, |
| WebFileDownloader |
| ] |
|
|
| try: |
| self.ai_agent = CodeAgent( |
| tools=tool_collection, |
| model=self.llm_model, |
| add_base_tools=True, |
| ) |
| logger.info(f"Code agent configured with {len(tool_collection)} custom tools") |
| except Exception as e: |
| logger.error(f"Agent setup failed: {str(e)}") |
| raise |
|
|
| def process_query(self, query_text: str) -> str: |
| """Process a query and return the agent's response""" |
| try: |
| logger.info(f"Processing query: {query_text[:100]}...") |
| response = self.ai_agent.run(query_text) |
| logger.info("Query processed successfully") |
| return response |
| except Exception as e: |
| error_response = f"Query processing error: {str(e)}" |
| logger.error(error_response) |
| return error_response |
|
|
| def execute_evaluation_workflow(user_profile: Optional[gr.OAuthProfile]) -> tuple[str, Optional[pd.DataFrame]]: |
| """Main evaluation workflow function""" |
|
|
| |
| if not user_profile: |
| logger.warning("Evaluation attempted without user authentication") |
| return "Authentication required - please log in to Hugging Face first.", None |
|
|
| username = user_profile.username |
| space_identifier = os.getenv("SPACE_ID") |
| logger.info(f"Starting evaluation workflow for user: {username}") |
|
|
| |
| questions_endpoint = f"{API_ENDPOINT_BASE}/questions" |
| submission_endpoint = f"{API_ENDPOINT_BASE}/submit" |
|
|
| |
| try: |
| ai_agent = EnhancedAIAgent() |
| logger.info("AI agent initialized for evaluation") |
| except Exception as initialization_error: |
| error_message = f"Agent initialization error: {str(initialization_error)}" |
| logger.error(error_message) |
| return error_message, None |
|
|
| |
| try: |
| logger.info("Fetching evaluation questions...") |
| questions_response = requests.get(questions_endpoint, timeout=20) |
| questions_response.raise_for_status() |
| questions_dataset = questions_response.json() |
| logger.info(f"Retrieved {len(questions_dataset)} evaluation questions") |
| except Exception as fetch_error: |
| error_message = f"Questions retrieval error: {str(fetch_error)}" |
| logger.error(error_message) |
| return error_message, None |
|
|
| |
| evaluation_log = [] |
| submission_answers = [] |
|
|
| for idx, question_item in enumerate(questions_dataset, 1): |
| task_identifier = question_item.get("task_id") |
| question_content = question_item.get("question") |
|
|
| if not task_identifier or question_content is None: |
| logger.warning(f"Skipping invalid question item at index {idx}") |
| continue |
|
|
| logger.info(f"Processing question {idx}/{len(questions_dataset)}: {task_identifier}") |
|
|
| try: |
| agent_response = ai_agent.process_query(question_content) |
|
|
| |
| submission_answers.append({ |
| "task_id": task_identifier, |
| "submitted_answer": agent_response |
| }) |
|
|
| evaluation_log.append({ |
| "Task ID": task_identifier, |
| "Question": question_content, |
| "Agent Response": agent_response, |
| "Status": "Success" |
| }) |
|
|
| logger.info(f"Question {task_identifier} processed successfully") |
|
|
| except Exception as processing_error: |
| error_response = f"PROCESSING_ERROR: {str(processing_error)}" |
| evaluation_log.append({ |
| "Task ID": task_identifier, |
| "Question": question_content, |
| "Agent Response": error_response, |
| "Status": "Failed" |
| }) |
| logger.error(f"Failed to process question {task_identifier}: {str(processing_error)}") |
|
|
| |
| if not submission_answers: |
| logger.warning("No valid answers generated for submission") |
| return "No answers were generated by the agent.", pd.DataFrame(evaluation_log) |
|
|
| |
| submission_payload = { |
| "username": username.strip(), |
| "agent_code": f"https://huggingface.co/spaces/{space_identifier}/tree/main", |
| "answers": submission_answers |
| } |
|
|
| |
| try: |
| logger.info("Submitting answers for evaluation...") |
| submission_response = requests.post( |
| submission_endpoint, |
| json=submission_payload, |
| timeout=90 |
| ) |
| submission_response.raise_for_status() |
| result_data = submission_response.json() |
|
|
| |
| success_message = ( |
| f"π Evaluation Completed Successfully!\n" |
| f"π€ User: {result_data.get('username', 'Unknown')}\n" |
| f"π Final Score: {result_data.get('score', 'N/A')}% " |
| f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n" |
| f"π¬ System Message: {result_data.get('message', 'No additional information.')}\n" |
| f"β° Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" |
| ) |
|
|
| logger.info(f"Submission successful - Score: {result_data.get('score', 'N/A')}%") |
| return success_message, pd.DataFrame(evaluation_log) |
|
|
| except Exception as submission_error: |
| error_message = f"Answer submission failed: {str(submission_error)}" |
| logger.error(error_message) |
| return error_message, pd.DataFrame(evaluation_log) |
|
|
| |
| def create_gradio_interface(): |
| """Create and configure the Gradio web interface""" |
|
|
| interface_theme = gr.themes.Soft( |
| primary_hue="blue", |
| secondary_hue="slate", |
| ) |
|
|
| with gr.Blocks(theme=interface_theme, title="AI Agent Evaluation Platform") as interface: |
|
|
| |
| gr.Markdown(""" |
| # π€ Advanced AI Agent Evaluation Platform |
| |
| **Welcome to the comprehensive AI agent testing environment!** |
| |
| ### Getting Started: |
| 1. π **Setup**: Clone this space and configure your Gemini API key in the environment |
| 2. π **Authentication**: Log in using your Hugging Face account credentials |
| 3. π **Execute**: Run the complete evaluation suite and submit your results |
| 4. π **Review**: Analyze performance metrics and detailed response logs |
| """) |
|
|
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### π Authentication") |
| auth_button = gr.LoginButton(value="Connect to Hugging Face") |
|
|
| with gr.Column(scale=2): |
| gr.Markdown("### π Evaluation Status") |
| status_display = gr.Textbox( |
| label="Current Status", |
| lines=6, |
| interactive=False, |
| placeholder="Ready to begin evaluation..." |
| ) |
|
|
| |
| gr.Markdown("### π― Evaluation Controls") |
| with gr.Row(): |
| execute_button = gr.Button( |
| "π Start Complete Evaluation", |
| variant="primary", |
| size="lg" |
| ) |
|
|
| |
| gr.Markdown("### π Detailed Results") |
| results_dataframe = gr.DataFrame( |
| label="Evaluation Results", |
| wrap=True |
| ) |
|
|
| |
| gr.Markdown(""" |
| --- |
| **Note**: This platform uses Gemini 2.0 Flash Lite for AI processing. |
| Ensure your API key has sufficient quota for evaluation tasks. |
| """) |
|
|
| |
| execute_button.click( |
| fn=execute_evaluation_workflow, |
| inputs=[], |
| outputs=[status_display, results_dataframe] |
| ) |
|
|
| return interface |
|
|
| |
| def main(): |
| """Main application entry point""" |
| print("π Initializing Advanced AI Agent Evaluation Platform...") |
| print(f"β° Startup Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
|
|
| try: |
| interface = create_gradio_interface() |
| print("β
Interface created successfully") |
|
|
| interface.launch( |
| debug=True, |
| share=False, |
| show_error=True |
| ) |
| except Exception as e: |
| logger.error(f"Application startup failed: {str(e)}") |
| sys.exit(1) |
|
|
| if __name__ == "__main__": |
| main() |