| | """ |
| | GitHub Read File Tool - Read file contents from any GitHub repository with line range support |
| | |
| | Fetch exact file contents with metadata, supporting line ranges for efficient reading. |
| | """ |
| |
|
| | import base64 |
| | import json |
| | import os |
| | from typing import Any, Dict, Optional |
| |
|
| | import nbformat |
| | import requests |
| | from nbconvert import MarkdownExporter |
| | from nbconvert.preprocessors import ClearOutputPreprocessor, TagRemovePreprocessor |
| |
|
| | from agent.tools.types import ToolResult |
| |
|
| |
|
| | def _convert_ipynb_to_markdown(content: str) -> str: |
| | """ |
| | Convert Jupyter notebook JSON to LLM-friendly Markdown. |
| | |
| | Args: |
| | content: Raw notebook JSON string |
| | |
| | Returns: |
| | Converted Markdown string |
| | """ |
| | try: |
| | |
| | nb_dict = json.loads(content) |
| |
|
| | |
| | if "cells" in nb_dict: |
| | for cell in nb_dict["cells"]: |
| | if "source" in cell and isinstance(cell["source"], list): |
| | cell["source"] = "".join(cell["source"]) |
| |
|
| | |
| | nb = nbformat.reads(json.dumps(nb_dict), as_version=4) |
| |
|
| | |
| | clear = ClearOutputPreprocessor() |
| | nb, _ = clear.preprocess(nb, {}) |
| |
|
| | |
| | remove = TagRemovePreprocessor( |
| | remove_cell_tags={"hide", "hidden", "remove"}, |
| | remove_input_tags=set(), |
| | remove_all_outputs_tags=set(), |
| | ) |
| | nb, _ = remove.preprocess(nb, {}) |
| |
|
| | |
| | exporter = MarkdownExporter() |
| | markdown, _ = exporter.from_notebook_node(nb) |
| |
|
| | return markdown |
| |
|
| | except json.JSONDecodeError: |
| | return content |
| | except Exception: |
| | return content |
| |
|
| |
|
| | def read_file( |
| | repo: str, |
| | path: str, |
| | ref: str = "HEAD", |
| | line_start: Optional[int] = None, |
| | line_end: Optional[int] = None, |
| | ) -> ToolResult: |
| | """ |
| | Read file contents from a GitHub repository with line range support. |
| | |
| | Args: |
| | repo: Repository in format "owner/repo" (e.g., "github/github-mcp-server") |
| | path: Path to file in repository (e.g., "pkg/github/search.go") |
| | ref: Git reference - branch name, tag, or commit SHA (default: "HEAD") |
| | line_start: Starting line number (1-indexed, inclusive) |
| | line_end: Ending line number (1-indexed, inclusive) |
| | |
| | Returns: |
| | ToolResult with file contents and metadata |
| | """ |
| | token = os.environ.get("GITHUB_TOKEN") |
| | if not token: |
| | return { |
| | "formatted": "Error: GITHUB_TOKEN environment variable is required", |
| | "totalResults": 0, |
| | "resultsShared": 0, |
| | "isError": True, |
| | } |
| |
|
| | |
| | if "/" not in repo: |
| | return { |
| | "formatted": "Error: repo must be in format 'owner/repo'", |
| | "totalResults": 0, |
| | "resultsShared": 0, |
| | "isError": True, |
| | } |
| |
|
| | owner, repo_name = repo.split("/", 1) |
| |
|
| | headers = { |
| | "Accept": "application/vnd.github+json", |
| | "X-GitHub-Api-Version": "2022-11-28", |
| | "Authorization": f"Bearer {token}", |
| | } |
| |
|
| | |
| | url = f"https://api.github.com/repos/{owner}/{repo_name}/contents/{path}" |
| | params = {} |
| | if ref and ref != "HEAD": |
| | params["ref"] = ref |
| |
|
| | try: |
| | response = requests.get(url, headers=headers, params=params, timeout=30) |
| |
|
| | if response.status_code == 404: |
| | return { |
| | "formatted": f"File not found: {path} in {repo} (ref: {ref})", |
| | "totalResults": 0, |
| | "resultsShared": 0, |
| | "isError": True, |
| | } |
| |
|
| | if response.status_code != 200: |
| | error_msg = f"GitHub API error (status {response.status_code})" |
| | try: |
| | error_data = response.json() |
| | if "message" in error_data: |
| | error_msg += f": {error_data['message']}" |
| | except Exception: |
| | pass |
| | return { |
| | "formatted": error_msg, |
| | "totalResults": 0, |
| | "resultsShared": 0, |
| | "isError": True, |
| | } |
| |
|
| | data = response.json() |
| |
|
| | |
| | if data.get("type") != "file": |
| | return { |
| | "formatted": f"Path {path} is not a file (type: {data.get('type')})", |
| | "totalResults": 0, |
| | "resultsShared": 0, |
| | "isError": True, |
| | } |
| |
|
| | |
| | content_b64 = data.get("content", "") |
| | if content_b64: |
| | content_b64 = content_b64.replace("\n", "").replace(" ", "") |
| | content = base64.b64decode(content_b64).decode("utf-8", errors="replace") |
| | else: |
| | |
| | raw_headers = { |
| | "Accept": "application/vnd.github.raw", |
| | "X-GitHub-Api-Version": "2022-11-28", |
| | "Authorization": f"Bearer {token}", |
| | } |
| | raw_response = requests.get( |
| | url, headers=raw_headers, params=params, timeout=30 |
| | ) |
| | if raw_response.status_code != 200: |
| | return { |
| | "formatted": "Failed to fetch file content", |
| | "totalResults": 0, |
| | "resultsShared": 0, |
| | "isError": True, |
| | } |
| | content = raw_response.text |
| |
|
| | if path.lower().endswith(".ipynb"): |
| | content = _convert_ipynb_to_markdown(content) |
| |
|
| | |
| | lines = content.split("\n") |
| | total_lines = len(lines) |
| |
|
| | truncated = False |
| |
|
| | if line_start is None and line_end is None: |
| | |
| | if total_lines > 300: |
| | line_start = 1 |
| | line_end = 300 |
| | truncated = True |
| | else: |
| | line_start = 1 |
| | line_end = total_lines |
| | else: |
| | |
| | if line_start is None: |
| | line_start = 1 |
| | if line_end is None: |
| | line_end = total_lines |
| |
|
| | |
| | line_start = max(1, line_start) |
| | line_end = min(total_lines, line_end) |
| | if line_start > line_end: |
| | return { |
| | "formatted": f"Invalid range: line_start ({line_start}) > line_end ({line_end})", |
| | "totalResults": 0, |
| | "resultsShared": 0, |
| | "isError": True, |
| | } |
| |
|
| | |
| | selected_lines = lines[line_start - 1 : line_end] |
| | selected_content = "\n".join(selected_lines) |
| |
|
| | |
| | lines_output = [f"**Reading file from repo: {repo}, path: {path}**"] |
| |
|
| | if ref and ref != "HEAD": |
| | lines_output.append(f"Ref: {ref}") |
| |
|
| | lines_output.append("\n**File content:") |
| | lines_output.append("```") |
| | lines_output.append(selected_content) |
| | lines_output.append("```") |
| | if truncated: |
| | lines_output.append( |
| | f"Currently showing lines {line_start}-{line_end} out of {total_lines} total lines. Use line_start and line_end to view more lines." |
| | ) |
| | return { |
| | "formatted": "\n".join(lines_output), |
| | "totalResults": 1, |
| | "resultsShared": 1, |
| | } |
| |
|
| | except requests.exceptions.RequestException as e: |
| | return { |
| | "formatted": f"Failed to connect to GitHub API: {str(e)}", |
| | "totalResults": 0, |
| | "resultsShared": 0, |
| | "isError": True, |
| | } |
| |
|
| |
|
| | |
| | GITHUB_READ_FILE_TOOL_SPEC = { |
| | "name": "github_read_file", |
| | "description": ( |
| | "Read file contents from GitHub repositories with line range support (default 300 lines). " |
| | "⚠️ CRITICAL: Use AFTER github_find_examples to study working implementation code. " |
| | "**Use when:** (1) Found example file via github_find_examples and need full code, " |
| | "(2) Need to read trainer class implementation, (3) Study configuration patterns, " |
| | "(4) Read specific code sections with line ranges, (5) Review code from specific branches/commits. " |
| | "**Pattern:** github_find_examples (discover files) → github_read_file (read code) → implement using researched patterns. " |
| | "Returns: File contents with line numbers, formatted for LLM reading. Auto-converts Jupyter notebooks to markdown. " |
| | "**Then:** Implement using patterns and APIs from the example code. " |
| | "**Critical for reliability:** Reading working examples prevents API errors and shows current best practices. " |
| | "Use line_start/line_end for large files (>300 lines) to read specific sections.\n\n" |
| | "## When to use this tool\n\n" |
| | "- When reading example code, trainer implementations, or configuration files\n" |
| | "- After github_find_examples returns file paths you want to study\n" |
| | "- When investigating specific code sections with line ranges\n" |
| | "- When reading from specific branches, tags, or commits (use ref parameter)\n\n" |
| | "## When NOT to use this tool\n\n" |
| | "- When you don't know exact file path (use github_find_examples or github_search_code first)\n" |
| | "- When searching for code patterns across repos (use github_search_code instead)\n\n" |
| | "## Examples\n\n" |
| | "<example>\n" |
| | "// ML Workflow Step: Read GRPO trainer class after finding via github_find_examples\n" |
| | "// Use case: Understand GRPOTrainer API, parameters, and methods\n" |
| | "{\n" |
| | " repo: 'huggingface/trl',\n" |
| | " path: 'trl/trainer/grpo_trainer.py',\n" |
| | " line_start: 1,\n" |
| | " line_end: 200\n" |
| | "}\n" |
| | "// Read class definition and constructor to understand current API\n" |
| | "// Shows: __init__ parameters, configuration, required arguments\n" |
| | "</example>\n\n" |
| | "<example>\n" |
| | "// ML Workflow Step: Study complete training script from examples\n" |
| | "// Use case: Learn end-to-end VLM fine-tuning workflow\n" |
| | "{\n" |
| | " repo: 'huggingface/trl',\n" |
| | " path: 'examples/scripts/grpo_vlm.py'\n" |
| | "}\n" |
| | "// Returns first 300 lines - shows full training setup\n" |
| | "// Use line_start/line_end if need to read more\n" |
| | "</example>\n\n" |
| | "<example>\n" |
| | "// ML Workflow Step: Check TrainingArguments configuration patterns\n" |
| | "// Use case: Learn how to structure training configs correctly\n" |
| | "{\n" |
| | " repo: 'huggingface/transformers',\n" |
| | " path: 'examples/pytorch/language-modeling/run_clm.py',\n" |
| | " line_start: 50,\n" |
| | " line_end: 150\n" |
| | "}\n" |
| | "// Read argument parsing and config setup section\n" |
| | "// Shows: current parameter names, default values, best practices\n" |
| | "</example>" |
| | ), |
| | "parameters": { |
| | "type": "object", |
| | "properties": { |
| | "repo": { |
| | "type": "string", |
| | "description": "Repository in format 'owner/repo' (e.g., 'github/github-mcp-server'). Required.", |
| | }, |
| | "path": { |
| | "type": "string", |
| | "description": "Path to file in repository (e.g., 'src/index.js'). Required.", |
| | }, |
| | "ref": { |
| | "type": "string", |
| | "description": "Git reference - branch name, tag, or commit SHA. Default: 'HEAD'.", |
| | }, |
| | "line_start": { |
| | "type": "integer", |
| | "description": "Starting line number (1-indexed, inclusive). Optional.", |
| | }, |
| | "line_end": { |
| | "type": "integer", |
| | "description": "Ending line number (1-indexed, inclusive). Optional.", |
| | }, |
| | }, |
| | "required": ["repo", "path"], |
| | }, |
| | } |
| |
|
| |
|
| | async def github_read_file_handler(arguments: Dict[str, Any]) -> tuple[str, bool]: |
| | """Handler for agent tool router""" |
| | try: |
| | result = read_file( |
| | repo=arguments["repo"], |
| | path=arguments["path"], |
| | ref=arguments.get("ref", "HEAD"), |
| | line_start=arguments.get("line_start"), |
| | line_end=arguments.get("line_end"), |
| | ) |
| | return result["formatted"], not result.get("isError", False) |
| | except Exception as e: |
| | return f"Error reading file: {str(e)}", False |
| |
|