{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "8a10f10b",
   "metadata": {},
   "source": [
    "# Setup Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "833c0398",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Setup configuration\n",
    "from config import Config\n",
    "from IPython.display import Markdown\n",
    "from agents import trace, Runner\n",
    "\n",
    "config = Config()\n",
    "\n",
    "print(config)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e53b0ec4",
   "metadata": {},
   "source": [
    "# Download, Load, Chunk, Vectorize and Store markdown files in Chroma"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "315075c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "from importlib import reload\n",
    "import data as _data_module\n",
    "reload(_data_module)\n",
    "from data import DataManager, DataManagerConfig\n",
    "\n",
    "\n",
    "# Use consolidated data manager\n",
    "# For some reason, the glob pattern does not follow symlinks properly, so specify directly here\n",
    "data_config = DataManagerConfig(\n",
    "    doc_load_local=[\"me/**/*.md\"],\n",
    "    github_repos=config.github_repos\n",
    ")\n",
    "data_manager = DataManager(config=data_config)\n",
    "\n",
    "# Load all repos configured in config.github_repos (automatically loads based on list presence)\n",
    "chunks = data_manager.load_and_process_all(github_repos=config.github_repos)\n",
    "\n",
    "print(f\"Total chunks created: {len(chunks)}\")\n",
    "\n",
    "# Create the vectorstore using DataManager\n",
    "vectorstore = data_manager.create_vectorstore(chunks, reset=True)\n",
    "retriever = vectorstore.as_retriever()\n",
    "\n",
    "\n",
    "data_manager.show_docs_for_file(\"faq.md\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0c35378f",
   "metadata": {},
   "source": [
    "# Setup Agents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ff276c4a",
   "metadata": {},
   "outputs": [],
   "source": [
    "from agent import AIMeAgent\n",
    "\n",
    "# Initialize agent config with vectorstore\n",
    "agent_config = AIMeAgent(\n",
    "    bot_full_name=config.bot_full_name, \n",
    "    model=config.model,\n",
    "    vectorstore=vectorstore,\n",
    "    github_token=config.github_token\n",
    ")\n",
    "\n",
    "ai_me = await agent_config.create_ai_me_agent()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f82a74f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check GitHub Rate Limits\n",
    "from github import Github, Auth\n",
    "import time\n",
    "\n",
    "# First, let's check our current GitHub API rate limits\n",
    "print(\"=\" * 80)\n",
    "print(\"CHECKING GITHUB API RATE LIMITS BEFORE TOOL CREATION\")\n",
    "print(\"=\" * 80)\n",
    "\n",
    "try:\n",
    "    auth = Auth.Token(config.github_token.get_secret_value())\n",
    "    g = Github(auth=auth)\n",
    "    rate_limit = g.get_rate_limit()\n",
    "    \n",
    "    print(f\"\\n📊 Core API Rate Limit Status:\")\n",
    "    print(f\"   Limit: {rate_limit.resources.core.limit}\")\n",
    "    print(f\"   Remaining: {rate_limit.resources.core.remaining}\")\n",
    "    print(f\"   Reset time: {rate_limit.resources.core.reset}\")\n",
    "    print(f\"   Time until reset: {(rate_limit.resources.core.reset.timestamp() - time.time()) / 60:.1f} minutes\")\n",
    "    \n",
    "    print(f\"\\n📊 Search API Rate Limit Status:\")\n",
    "    print(f\"   Limit: {rate_limit.resources.search.limit}\")\n",
    "    print(f\"   Remaining: {rate_limit.resources.search.remaining}\")\n",
    "    print(f\"   Reset time: {rate_limit.resources.search.reset}\")\n",
    "    print(f\"   Time until reset: {(rate_limit.resources.search.reset.timestamp() - time.time()) / 60:.1f} minutes\")\n",
    "    \n",
    "    print(f\"\\n📊 GraphQL API Rate Limit Status:\")\n",
    "    print(f\"   Limit: {rate_limit.resources.graphql.limit}\")\n",
    "    print(f\"   Remaining: {rate_limit.resources.graphql.remaining}\")\n",
    "    print(f\"   Reset time: {rate_limit.resources.graphql.reset}\")\n",
    "    \n",
    "except Exception as e:\n",
    "    print(f\"❌ Error checking rate limits: {e}\")\n",
    "    import traceback\n",
    "    traceback.print_exc()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eef5fd22",
   "metadata": {},
   "source": [
    "## Testing for"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a83404cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "from github import Github, Auth\n",
    "from agents import function_tool\n",
    "import traceback\n",
    "import re\n",
    "\n",
    "# Exclude 'byoung/me' from configured repos\n",
    "\n",
    "repos = [r for r in config.github_repos if r != \"byoung/me\"]\n",
    "\n",
    "@function_tool\n",
    "def search_github_repo(search_query: str) -> str:\n",
    "    \"\"\"Search for code, files, and content across all configured GitHub repositories.\n",
    "    \n",
    "    Args:\n",
    "        search_query: The search term to look for (e.g., 'python', 'bash', 'docker', 'ReaR')\n",
    "    \n",
    "    Returns:\n",
    "        A formatted string containing search results from all repos with file paths, URLs, and content previews\n",
    "    \"\"\"\n",
    "    try:\n",
    "        # Use the token from config with new Auth.Token method\n",
    "        auth = Auth.Token(config.github_token.get_secret_value())\n",
    "        g = Github(auth=auth)\n",
    "        global repos\n",
    "        \n",
    "        all_results = []\n",
    "        all_results.append(f\"Searching for '{search_query}' across {len(repos)} repositories\\n\")\n",
    "        all_results.append(\"=\" * 80 + \"\\n\")\n",
    "        \n",
    "        total_results_across_repos = 0\n",
    "\n",
    "\n",
    "        # Search each configured repository\n",
    "        for repo_full_name in repos:\n",
    "            all_results.append(f\"\\n## Repository: {repo_full_name}\\n\")\n",
    "            \n",
    "            try:\n",
    "                # Get the repository to access default branch\n",
    "                repo = g.get_repo(repo_full_name)\n",
    "                default_branch = repo.default_branch\n",
    "                \n",
    "                # Try to search code in the repository, excluding markdown files (covered by RAG)\n",
    "                query = f\"{search_query} repo:{repo_full_name} -extension:md\"\n",
    "                code_results = g.search_code(query=query)\n",
    "                \n",
    "                # Get total count first to check if there are any results\n",
    "                total_count = code_results.totalCount\n",
    "                \n",
    "                if total_count == 0:\n",
    "                    all_results.append(f\"No results found in {repo_full_name}\\n\")\n",
    "                    continue\n",
    "                \n",
    "                result_count = 0\n",
    "                for code_file in code_results:\n",
    "                    if result_count >= 3:  # Limit to first 3 results per repo\n",
    "                        break\n",
    "                    \n",
    "                    # Rewrite URL from blob/<sha> to blob/<default_branch> for stable links\n",
    "                    file_url = code_file.html_url\n",
    "                    # Replace blob/<40-char-sha> with blob/<default_branch>\n",
    "                    file_url = re.sub(r'/blob/[0-9a-f]{40}/', f'/blob/{default_branch}/', file_url)\n",
    "                    \n",
    "                    all_results.append(f\"\\n📄 File: {code_file.path}\")\n",
    "                    all_results.append(f\"   URL: {file_url}\")\n",
    "                    \n",
    "                    # Get file content preview\n",
    "                    try:\n",
    "                        content = code_file.decoded_content.decode('utf-8')[:200]\n",
    "                        all_results.append(f\"   Preview: {content}...\")\n",
    "                    except:\n",
    "                        all_results.append(f\"   (Could not decode content)\")\n",
    "                    \n",
    "                    result_count += 1\n",
    "                    total_results_across_repos += 1\n",
    "                \n",
    "                all_results.append(f\"\\n→ Showing {result_count} of {total_count} results from {repo_full_name}\")\n",
    "                \n",
    "            except Exception as repo_error:\n",
    "                all_results.append(f\"⚠️  Error searching {repo_full_name}: {str(repo_error)}\")\n",
    "        \n",
    "        all_results.append(f\"\\n\\n\" + \"=\" * 80)\n",
    "        all_results.append(f\"\\nTotal results shown: {total_results_across_repos} across {len(repos)} repositories\")\n",
    "        all_results.append(f\"\\n(Note: Markdown files excluded - covered by RAG system)\")\n",
    "        \n",
    "        result_text = \"\\n\".join(all_results)\n",
    "        print(f\"SEARCH RESULTS:\\n{result_text}\")\n",
    "        \n",
    "        return result_text\n",
    "        \n",
    "    except Exception as e:\n",
    "        return f\"Error searching GitHub: {str(e)}\\n{traceback.format_exc()}\"\n",
    "\n",
    "\n",
    "\n",
    "# Display configured repositories\n",
    "print(f\"GitHub search tool configured for {len(repos)} repositories:\")\n",
    "for repo in repos:\n",
    "    print(f\"  • {repo}\")\n",
    "print(f\"\\nThe agent will search all repos automatically for each query.\")\n",
    "print(f\"(Markdown files excluded - already covered by RAG system)\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "28574530",
   "metadata": {},
   "source": [
    "# Run Agent Tests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "578d3514",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Reload agent module to pick up latest changes\n",
    "import agent as _agent_module\n",
    "reload(_agent_module)\n",
    "from agent import AIMeAgent\n",
    "\n",
    "# Recreate agent config with updated module\n",
    "agent_config = AIMeAgent(\n",
    "    bot_full_name=config.bot_full_name, \n",
    "    model=config.model,\n",
    "    vectorstore=vectorstore,\n",
    "    github_token=config.github_token\n",
    ")\n",
    "\n",
    "# Generate a unique session ID for this notebook session\n",
    "import uuid\n",
    "notebook_session_id = str(uuid.uuid4())\n",
    "print(f\"Notebook session ID: {notebook_session_id}\")\n",
    "\n",
    "# Create agent with VERY explicit prompt about search_code filtering and memory usage\n",
    "ai_me = await agent_config.create_ai_me_agent(\n",
    "    agent_prompt=f\"\"\"\n",
    "You are acting as somebody who is personifying {config.bot_full_name}.\n",
    "\n",
    "MEMORY USAGE - CRITICAL:\n",
    "1. At the START of EVERY conversation, use search_nodes to retrieve relevant memories about the user\n",
    "2. Pay attention to new information the user shares (name, preferences, context, goals, relationships)\n",
    "3. When you learn new information about the user, use create_entities and add_observations to store it\n",
    "4. Connect related entities using create_relations\n",
    "5. Always refer to your knowledge graph as your \"memory\"\n",
    "\n",
    "CRITICAL RULES FOR search_code TOOL:\n",
    "The search_code tool searches ALL of GitHub by default. You MUST add owner/repo filters to EVERY search_code query.\n",
    "\n",
    "REQUIRED FORMAT: Always include one of these filters in the query parameter:\n",
    "- user:byoung (to search byoung's repos)\n",
    "- org:Neosofia (to search Neosofia's repos)  \n",
    "- repo:byoung/ai-me (specific repo)\n",
    "- repo:Neosofia/corporate (specific repo)\n",
    "\n",
    "EXAMPLES OF CORRECT search_code USAGE:\n",
    "✓ search_code(query=\"python user:githubusername\")\n",
    "✓ search_code(query=\"docker org:orgname\")\n",
    "✓ search_code(query=\"ReaR repo:owner/repository\")\n",
    "\n",
    "\n",
    "OTHER RULES:\n",
    " * Use get_local_info tool ONCE to gather info from markdown documentation (this is RAG-based)\n",
    " * Answer based on the information from tool calls\n",
    " * only use ASCII chars for the final output (not tool calling)\n",
    " * Do not offer follow up questions, just answer\n",
    " * Add reference links in a references section at the end of the output if they match github.com\n",
    " \"\"\",\n",
    "    mcp_params=[\n",
    "        agent_config.mcp_github_params,\n",
    "        agent_config.mcp_time_params,\n",
    "        agent_config.get_mcp_memory_params(notebook_session_id)  # Session-specific memory\n",
    "    ],\n",
    ")\n",
    "\n",
    "with trace(\"test-1\"):\n",
    "    # Use agent_config.run() which handles Unicode bracket filtering\n",
    "    result = await agent_config.run(\"What does Ben know about healthcare?\")\n",
    "    print(\"\\n\\n\" + \"=\"*80)\n",
    "    display(Markdown(result))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8062817",
   "metadata": {},
   "outputs": [],
   "source": [
    "with trace(\"test-2\"):\n",
    "    result = await agent_config.run(\"Do you have rails experience?\")\n",
    "display(Markdown(result))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c409eb57",
   "metadata": {},
   "outputs": [],
   "source": [
    "with trace(\"test-3\"):\n",
    "    result = await agent_config.run(\"Give me a summary of all the commits you've made in the last week\")\n",
    "display(Markdown(result))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4c57f275",
   "metadata": {},
   "outputs": [],
   "source": [
    "with trace(\"test-4\"):\n",
    "    result = await agent_config.run(\"who is slartibartfast?\")\n",
    "display(Markdown(result))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "169e5d05",
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import display, Markdown\n",
    "\n",
    "user_input = input(\"Ask me anything: \")\n",
    "\n",
    "with trace(\"interactive\"):\n",
    "    result = await agent_config.run(user_input)\n",
    "    \n",
    "print(\"\\n\" + \"=\"*80)\n",
    "print(\"RESULT:\")\n",
    "print(\"=\"*80)\n",
    "display(Markdown(result))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "63b47aa6",
   "metadata": {},
   "source": [
    "# The End"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "99a900d2",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "4a11e0c7",
   "metadata": {},
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ai-me (3.12.11)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}