# Setup Environment

In [None]:
# Setup configuration
from config import Config
from IPython.display import Markdown
from agents import trace, Runner

config = Config()

print(config)

# Download, Load, Chunk, Vectorize and Store markdown files in Chroma

In [None]:
from importlib import reload
import data as _data_module
reload(_data_module)
from data import DataManager, DataManagerConfig


# Use consolidated data manager
# For some reason, the glob pattern does not follow symlinks properly, so specify directly here
data_config = DataManagerConfig(
 doc_load_local=["me/**/*.md"],
 github_repos=config.github_repos
)
data_manager = DataManager(config=data_config)

# Load all repos configured in config.github_repos (automatically loads based on list presence)
chunks = data_manager.load_and_process_all(github_repos=config.github_repos)

print(f"Total chunks created: {len(chunks)}")

# Create the vectorstore using DataManager
vectorstore = data_manager.create_vectorstore(chunks, reset=True)
retriever = vectorstore.as_retriever()


data_manager.show_docs_for_file("faq.md")

# Setup Agents

In [None]:
from agent import AIMeAgent

# Initialize agent config with vectorstore
agent_config = AIMeAgent(
 bot_full_name=config.bot_full_name, 
 model=config.model,
 vectorstore=vectorstore,
 github_token=config.github_token
)

ai_me = await agent_config.create_ai_me_agent()

In [None]:
# Check GitHub Rate Limits
from github import Github, Auth
import time

# First, let's check our current GitHub API rate limits
print("=" * 80)
print("CHECKING GITHUB API RATE LIMITS BEFORE TOOL CREATION")
print("=" * 80)

try:
 auth = Auth.Token(config.github_token.get_secret_value())
 g = Github(auth=auth)
 rate_limit = g.get_rate_limit()
 
 print(f"\nšŸ“Š Core API Rate Limit Status:")
 print(f" Limit: {rate_limit.resources.core.limit}")
 print(f" Remaining: {rate_limit.resources.core.remaining}")
 print(f" Reset time: {rate_limit.resources.core.reset}")
 print(f" Time until reset: {(rate_limit.resources.core.reset.timestamp() - time.time()) / 60:.1f} minutes")
 
 print(f"\nšŸ“Š Search API Rate Limit Status:")
 print(f" Limit: {rate_limit.resources.search.limit}")
 print(f" Remaining: {rate_limit.resources.search.remaining}")
 print(f" Reset time: {rate_limit.resources.search.reset}")
 print(f" Time until reset: {(rate_limit.resources.search.reset.timestamp() - time.time()) / 60:.1f} minutes")
 
 print(f"\nšŸ“Š GraphQL API Rate Limit Status:")
 print(f" Limit: {rate_limit.resources.graphql.limit}")
 print(f" Remaining: {rate_limit.resources.graphql.remaining}")
 print(f" Reset time: {rate_limit.resources.graphql.reset}")
 
except Exception as e:
 print(f"āŒ Error checking rate limits: {e}")
 import traceback
 traceback.print_exc()


## Testing for

In [None]:
from github import Github, Auth
from agents import function_tool
import traceback
import re

# Exclude 'byoung/me' from configured repos

repos = [r for r in config.github_repos if r != "byoung/me"]

@function_tool
def search_github_repo(search_query: str) -> str:
 """Search for code, files, and content across all configured GitHub repositories.
 
 Args:
 search_query: The search term to look for (e.g., 'python', 'bash', 'docker', 'ReaR')
 
 Returns:
 A formatted string containing search results from all repos with file paths, URLs, and content previews
 """
 try:
 # Use the token from config with new Auth.Token method
 auth = Auth.Token(config.github_token.get_secret_value())
 g = Github(auth=auth)
 global repos
 
 all_results = []
 all_results.append(f"Searching for '{search_query}' across {len(repos)} repositories\n")
 all_results.append("=" * 80 + "\n")
 
 total_results_across_repos = 0


 # Search each configured repository
 for repo_full_name in repos:
 all_results.append(f"\n## Repository: {repo_full_name}\n")
 
 try:
 # Get the repository to access default branch
 repo = g.get_repo(repo_full_name)
 default_branch = repo.default_branch
 
 # Try to search code in the repository, excluding markdown files (covered by RAG)
 query = f"{search_query} repo:{repo_full_name} -extension:md"
 code_results = g.search_code(query=query)
 
 # Get total count first to check if there are any results
 total_count = code_results.totalCount
 
 if total_count == 0:
 all_results.append(f"No results found in {repo_full_name}\n")
 continue
 
 result_count = 0
 for code_file in code_results:
 if result_count >= 3: # Limit to first 3 results per repo
 break
 
 # Rewrite URL from blob/ to blob/ for stable links
 file_url = code_file.html_url
 # Replace blob/<40-char-sha> with blob/
 file_url = re.sub(r'/blob/[0-9a-f]{40}/', f'/blob/{default_branch}/', file_url)
 
 all_results.append(f"\nšŸ“„ File: {code_file.path}")
 all_results.append(f" URL: {file_url}")
 
 # Get file content preview
 try:
 content = code_file.decoded_content.decode('utf-8')[:200]
 all_results.append(f" Preview: {content}...")
 except:
 all_results.append(f" (Could not decode content)")
 
 result_count += 1
 total_results_across_repos += 1
 
 all_results.append(f"\n→ Showing {result_count} of {total_count} results from {repo_full_name}")
 
 except Exception as repo_error:
 all_results.append(f"āš ļø Error searching {repo_full_name}: {str(repo_error)}")
 
 all_results.append(f"\n\n" + "=" * 80)
 all_results.append(f"\nTotal results shown: {total_results_across_repos} across {len(repos)} repositories")
 all_results.append(f"\n(Note: Markdown files excluded - covered by RAG system)")
 
 result_text = "\n".join(all_results)
 print(f"SEARCH RESULTS:\n{result_text}")
 
 return result_text
 
 except Exception as e:
 return f"Error searching GitHub: {str(e)}\n{traceback.format_exc()}"



# Display configured repositories
print(f"GitHub search tool configured for {len(repos)} repositories:")
for repo in repos:
 print(f" • {repo}")
print(f"\nThe agent will search all repos automatically for each query.")
print(f"(Markdown files excluded - already covered by RAG system)")


# Run Agent Tests

In [None]:
# Reload agent module to pick up latest changes
import agent as _agent_module
reload(_agent_module)
from agent import AIMeAgent

# Recreate agent config with updated module
agent_config = AIMeAgent(
 bot_full_name=config.bot_full_name, 
 model=config.model,
 vectorstore=vectorstore,
 github_token=config.github_token
)

# Generate a unique session ID for this notebook session
import uuid
notebook_session_id = str(uuid.uuid4())
print(f"Notebook session ID: {notebook_session_id}")

# Create agent with VERY explicit prompt about search_code filtering and memory usage
ai_me = await agent_config.create_ai_me_agent(
 agent_prompt=f"""
You are acting as somebody who is personifying {config.bot_full_name}.

MEMORY USAGE - CRITICAL:
1. At the START of EVERY conversation, use search_nodes to retrieve relevant memories about the user
2. Pay attention to new information the user shares (name, preferences, context, goals, relationships)
3. When you learn new information about the user, use create_entities and add_observations to store it
4. Connect related entities using create_relations
5. Always refer to your knowledge graph as your "memory"

CRITICAL RULES FOR search_code TOOL:
The search_code tool searches ALL of GitHub by default. You MUST add owner/repo filters to EVERY search_code query.

REQUIRED FORMAT: Always include one of these filters in the query parameter:
- user:byoung (to search byoung's repos)
- org:Neosofia (to search Neosofia's repos) 
- repo:byoung/ai-me (specific repo)
- repo:Neosofia/corporate (specific repo)

EXAMPLES OF CORRECT search_code USAGE:
āœ“ search_code(query="python user:githubusername")
āœ“ search_code(query="docker org:orgname")
āœ“ search_code(query="ReaR repo:owner/repository")


OTHER RULES:
 * Use get_local_info tool ONCE to gather info from markdown documentation (this is RAG-based)
 * Answer based on the information from tool calls
 * only use ASCII chars for the final output (not tool calling)
 * Do not offer follow up questions, just answer
 * Add reference links in a references section at the end of the output if they match github.com
 """,
 mcp_params=[
 agent_config.mcp_github_params,
 agent_config.mcp_time_params,
 agent_config.get_mcp_memory_params(notebook_session_id) # Session-specific memory
 ],
)

with trace("test-1"):
 # Use agent_config.run() which handles Unicode bracket filtering
 result = await agent_config.run("What does Ben know about healthcare?")
 print("\n\n" + "="*80)
 display(Markdown(result))

In [None]:
with trace("test-2"):
 result = await agent_config.run("Do you have rails experience?")
display(Markdown(result))

In [None]:
with trace("test-3"):
 result = await agent_config.run("Give me a summary of all the commits you've made in the last week")
display(Markdown(result))

In [None]:
with trace("test-4"):
 result = await agent_config.run("who is slartibartfast?")
display(Markdown(result))

In [None]:
from IPython.display import display, Markdown

user_input = input("Ask me anything: ")

with trace("interactive"):
 result = await agent_config.run(user_input)
 
print("\n" + "="*80)
print("RESULT:")
print("="*80)
display(Markdown(result))

# The End