Spaces:
Running
Running
jedick
commited on
Commit
·
9e909f5
1
Parent(s):
93cd4db
Add app files
Browse files- Dockerfile +43 -0
- PlotMyData/__init__.py +9 -0
- PlotMyData/agent.py +320 -0
- README.md +2 -2
- data_summary.R +37 -0
- docker/entrypoint.sh +23 -0
- prompts.R +133 -0
- prompts.py +114 -0
- requirements.txt +3 -0
- server.R +134 -0
Dockerfile
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Declare the base image
|
| 2 |
+
FROM rocker/r-ver:latest
|
| 3 |
+
|
| 4 |
+
# Considerations for local development: reduce Docker cache size and rebuild time
|
| 5 |
+
# Single RUN directive and two COPY directives
|
| 6 |
+
# Pre-RUN COPY for relatively stable files, post-RUN COPY for app files
|
| 7 |
+
# Avoid other directives like USER and ENV
|
| 8 |
+
# entrypoint.sh activates the virtual environment for running the app
|
| 9 |
+
# Considerations for remote development (HF Spaces Dev Mode)
|
| 10 |
+
# Dev Mode requires useradd, chown and USER
|
| 11 |
+
# Use CMD instead of ENTRYPOINT
|
| 12 |
+
|
| 13 |
+
# Set working directory and copy non-app files
|
| 14 |
+
WORKDIR /app
|
| 15 |
+
COPY requirements.txt docker/entrypoint.sh .
|
| 16 |
+
|
| 17 |
+
# Install Python and system tools
|
| 18 |
+
# Create and activate virtual environment for installing packages
|
| 19 |
+
# Install required Python and R packages
|
| 20 |
+
# Make startup script executable
|
| 21 |
+
# Add user for HF Spaces Dev Mode and chown /app directory for user
|
| 22 |
+
RUN apt-get update && \
|
| 23 |
+
apt-get install -y python3 python3-pip python3-venv screen vim && \
|
| 24 |
+
apt-get clean && \
|
| 25 |
+
rm -rf /var/lib/apt/lists/* && \
|
| 26 |
+
python3 -m venv /opt/venv && \
|
| 27 |
+
export PATH="/opt/venv/bin:$PATH" && \
|
| 28 |
+
pip --no-cache-dir install -r requirements.txt && \
|
| 29 |
+
R -q -e 'install.packages(c("ellmer", "mcptools", "readr", "ggplot2", "tidyverse"))' && \
|
| 30 |
+
chmod +x entrypoint.sh && \
|
| 31 |
+
chown -r 1000:1000 /app && \
|
| 32 |
+
useradd -m -u 1000 user
|
| 33 |
+
|
| 34 |
+
# Copy app files with user permissions
|
| 35 |
+
# NOTE: because the repo has docker/entrypoint.sh, this does *not*
|
| 36 |
+
# overwrite the entrypoint.sh that we made executable above
|
| 37 |
+
COPY --chown=1000 . /app
|
| 38 |
+
|
| 39 |
+
# Set the user for Dev Mode
|
| 40 |
+
USER 1000
|
| 41 |
+
|
| 42 |
+
# Set default command (executable file in WORKDIR)
|
| 43 |
+
CMD [ "/app/entrypoint.sh" ]
|
PlotMyData/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from . import agent
|
| 4 |
+
|
| 5 |
+
# Ensure upload directory exists
|
| 6 |
+
upload_dir = "/tmp/uploads"
|
| 7 |
+
Path(upload_dir).mkdir(parents=True, exist_ok=True)
|
| 8 |
+
# Read, write, execute for owner; read and execute for others
|
| 9 |
+
os.chmod(upload_dir, 0o755)
|
PlotMyData/agent.py
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from google.adk.plugins.save_files_as_artifacts_plugin import SaveFilesAsArtifactsPlugin
|
| 2 |
+
from google.adk.tools.mcp_tool.mcp_session_manager import StdioConnectionParams
|
| 3 |
+
from google.adk.tools.mcp_tool.mcp_session_manager import SseConnectionParams
|
| 4 |
+
from google.adk.tools.mcp_tool.mcp_toolset import McpToolset
|
| 5 |
+
from google.adk.tools.tool_context import ToolContext
|
| 6 |
+
from google.adk.tools.base_tool import BaseTool
|
| 7 |
+
from google.adk.agents.callback_context import CallbackContext
|
| 8 |
+
from google.adk.agents import LlmAgent
|
| 9 |
+
from google.adk.models import LlmResponse, LlmRequest
|
| 10 |
+
from google.adk.models.lite_llm import LiteLlm
|
| 11 |
+
from google.adk.apps import App
|
| 12 |
+
from google.genai import types
|
| 13 |
+
from mcp import ClientSession, StdioServerParameters
|
| 14 |
+
from mcp.types import CallToolResult, TextContent
|
| 15 |
+
from mcp.client.stdio import stdio_client
|
| 16 |
+
from typing import Dict, Any, Optional, Tuple
|
| 17 |
+
from prompts import Root, Run, Data, Plot
|
| 18 |
+
import base64
|
| 19 |
+
import os
|
| 20 |
+
|
| 21 |
+
# Define MCP server parameters
|
| 22 |
+
server_params = StdioServerParameters(
|
| 23 |
+
command="Rscript",
|
| 24 |
+
args=[
|
| 25 |
+
# Use --vanilla to ignore .Rprofile, which is meant for the R instance running mcp_session()
|
| 26 |
+
"--vanilla",
|
| 27 |
+
"server.R",
|
| 28 |
+
],
|
| 29 |
+
)
|
| 30 |
+
# STDIO transport to local R MCP server
|
| 31 |
+
connection_params = StdioConnectionParams(server_params=server_params, timeout=10)
|
| 32 |
+
|
| 33 |
+
# Define model
|
| 34 |
+
# If we're using the OpenAI API, get the value of OPENAI_MODEL_NAME set by entrypoint.sh
|
| 35 |
+
# If we're using an OpenAI-compatible endpoint (Docker Model Runner), use a fake API key
|
| 36 |
+
model = LiteLlm(
|
| 37 |
+
model=os.environ.get("OPENAI_MODEL_NAME", ""),
|
| 38 |
+
api_key=os.environ.get("OPENAI_API_KEY", "fake-API-key"),
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
async def select_r_session(
|
| 43 |
+
callback_context: CallbackContext,
|
| 44 |
+
) -> Optional[types.Content]:
|
| 45 |
+
"""
|
| 46 |
+
Callback function to select the first R session.
|
| 47 |
+
"""
|
| 48 |
+
async with stdio_client(server_params) as (read, write):
|
| 49 |
+
async with ClientSession(read, write) as session:
|
| 50 |
+
await session.initialize()
|
| 51 |
+
await session.call_tool("select_r_session", {"session": 1})
|
| 52 |
+
print("[select_r_session] R session selected!")
|
| 53 |
+
# Return None to allow the LlmAgent's normal execution
|
| 54 |
+
return None
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
async def catch_tool_errors(tool: BaseTool, args: dict, tool_context: ToolContext):
|
| 58 |
+
"""
|
| 59 |
+
Callback function to catch errors from tool calls and turn them into a message.
|
| 60 |
+
Modified from https://github.com/google/adk-python/discussions/795#discussioncomment-13460659
|
| 61 |
+
"""
|
| 62 |
+
try:
|
| 63 |
+
return await tool.run_async(args=args, tool_context=tool_context)
|
| 64 |
+
except Exception as e:
|
| 65 |
+
# Format the error as a tool response
|
| 66 |
+
# https://github.com/google/adk-python/commit/4df926388b6e9ebcf517fbacf2f5532fd73b0f71
|
| 67 |
+
response = CallToolResult(
|
| 68 |
+
# The error has class McpError; use e.error.message to get the text
|
| 69 |
+
content=[TextContent(type="text", text=e.error.message)],
|
| 70 |
+
isError=True,
|
| 71 |
+
)
|
| 72 |
+
return response.model_dump(exclude_none=True, mode="json")
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
async def preprocess_artifact(
|
| 76 |
+
callback_context: CallbackContext, llm_request: LlmRequest
|
| 77 |
+
) -> Optional[LlmResponse]:
|
| 78 |
+
"""
|
| 79 |
+
Callback function to copy the latest artifact to a temporary file.
|
| 80 |
+
"""
|
| 81 |
+
|
| 82 |
+
# Callback and artifact handling code modified from:
|
| 83 |
+
# https://google.github.io/adk-docs/callbacks/types-of-callbacks/#before-model-callback
|
| 84 |
+
# https://github.com/google/adk-python/issues/2176#issuecomment-3395469070
|
| 85 |
+
|
| 86 |
+
# Get the last user message in the request contents
|
| 87 |
+
last_user_message = llm_request.contents[-1].parts[-1].text
|
| 88 |
+
|
| 89 |
+
# Function call events have no text part, so set this to "" for string search in the next step
|
| 90 |
+
if last_user_message is None:
|
| 91 |
+
last_user_message = ""
|
| 92 |
+
|
| 93 |
+
# If a file was uploaded then SaveFilesAsArtifactsPlugin() adds "[Uploaded Artifact: file_name.csv]" to the user message
|
| 94 |
+
# Check for "Uploaded Artifact:" in the last user message
|
| 95 |
+
if "Uploaded Artifact:" in last_user_message:
|
| 96 |
+
|
| 97 |
+
# Add a text part only if there are any issues with accessing or saving the artifact
|
| 98 |
+
added_text = ""
|
| 99 |
+
# List available artifacts
|
| 100 |
+
artifacts = await callback_context.list_artifacts()
|
| 101 |
+
if len(artifacts) == 0:
|
| 102 |
+
added_text = "No uploaded file is available"
|
| 103 |
+
else:
|
| 104 |
+
most_recent_file = artifacts[-1]
|
| 105 |
+
try:
|
| 106 |
+
# Get artifact and byte data
|
| 107 |
+
artifact = await callback_context.load_artifact(
|
| 108 |
+
filename=most_recent_file
|
| 109 |
+
)
|
| 110 |
+
byte_data = artifact.inline_data.data
|
| 111 |
+
# Save artifact as temporary file
|
| 112 |
+
tmp_dir = "/tmp/uploads"
|
| 113 |
+
tmp_file_path = os.path.join(tmp_dir, most_recent_file)
|
| 114 |
+
# Write the file
|
| 115 |
+
with open(tmp_file_path, "wb") as f:
|
| 116 |
+
f.write(byte_data)
|
| 117 |
+
# Set appropriate permissions
|
| 118 |
+
os.chmod(tmp_file_path, 0o644)
|
| 119 |
+
print(f"[preprocess_artifact] Saved artifact as '{tmp_file_path}'")
|
| 120 |
+
|
| 121 |
+
except Exception as e:
|
| 122 |
+
added_text = f"Error processing artifact: {str(e)}"
|
| 123 |
+
|
| 124 |
+
# If there were any issues, add a new part to the user message
|
| 125 |
+
if added_text:
|
| 126 |
+
# llm_request.contents[-1].parts.append(types.Part(text=added_text))
|
| 127 |
+
llm_request.contents[0].parts.append(types.Part(text=added_text))
|
| 128 |
+
print(
|
| 129 |
+
f"[preprocess_artifact] Added text part to user message: '{added_text}'"
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
# Return None to allow the possibly modified request to go to the LLM
|
| 133 |
+
return None
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
async def preprocess_messages(
|
| 137 |
+
callback_context: CallbackContext, llm_request: LlmRequest
|
| 138 |
+
) -> Optional[LlmResponse]:
|
| 139 |
+
"""
|
| 140 |
+
Callback function to modify user messages to point to temporary artifact file paths.
|
| 141 |
+
"""
|
| 142 |
+
|
| 143 |
+
# Changes to session state made by callbacks are not preserved across events
|
| 144 |
+
# See: https://github.com/google/adk-docs/issues/904
|
| 145 |
+
# Therefore, for every callback invocation we need to loop over all events, not just the most recent one
|
| 146 |
+
for i in range(len(llm_request.contents)):
|
| 147 |
+
# Inspect the user message in the request contents
|
| 148 |
+
user_message = llm_request.contents[i].parts[-1].text
|
| 149 |
+
if user_message:
|
| 150 |
+
# Modify file path in user message
|
| 151 |
+
# Original file path inserted by SaveFilesAsArtifactsPlugin():
|
| 152 |
+
# [Uploaded Artifact: "breast-cancer.csv"]
|
| 153 |
+
# Modified file path used by preprocess_artifact():
|
| 154 |
+
# [Uploaded File: "/tmp/uploads/breast-cancer.csv"]
|
| 155 |
+
tmp_dir = "/tmp/uploads/"
|
| 156 |
+
if '[Uploaded Artifact: "' in user_message:
|
| 157 |
+
user_message = user_message.replace(
|
| 158 |
+
'[Uploaded Artifact: "', f'[Uploaded File: "{tmp_dir}'
|
| 159 |
+
)
|
| 160 |
+
llm_request.contents[i].parts[-1].text = user_message
|
| 161 |
+
print(f"[preprocess_messages] Modified user message: '{user_message}'")
|
| 162 |
+
|
| 163 |
+
return None
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def detect_file_type(byte_data: bytes) -> Tuple[str, str]:
|
| 167 |
+
"""
|
| 168 |
+
Detect file type from magic number/bytes and return (mime_type, file_extension).
|
| 169 |
+
Supports BMP, JPEG, PNG, TIFF, and PDF.
|
| 170 |
+
"""
|
| 171 |
+
if len(byte_data) < 8:
|
| 172 |
+
# Default to PNG if we can't determine
|
| 173 |
+
return "image/png", "png"
|
| 174 |
+
|
| 175 |
+
# Check magic numbers
|
| 176 |
+
if byte_data.startswith(b"\x89PNG\r\n\x1a\n"):
|
| 177 |
+
return "image/png", "png"
|
| 178 |
+
elif byte_data.startswith(b"\xff\xd8\xff"):
|
| 179 |
+
return "image/jpeg", "jpg"
|
| 180 |
+
elif byte_data.startswith(b"BM"):
|
| 181 |
+
return "image/bmp", "bmp"
|
| 182 |
+
elif byte_data.startswith(b"II*\x00") or byte_data.startswith(b"MM\x00*"):
|
| 183 |
+
return "image/tiff", "tiff"
|
| 184 |
+
elif byte_data.startswith(b"%PDF"):
|
| 185 |
+
return "application/pdf", "pdf"
|
| 186 |
+
else:
|
| 187 |
+
# Default to PNG if we can't determine
|
| 188 |
+
return "image/png", "png"
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
async def save_plot_artifact(
|
| 192 |
+
tool: BaseTool, args: Dict[str, Any], tool_context: ToolContext, tool_response: Dict
|
| 193 |
+
) -> Optional[Dict]:
|
| 194 |
+
"""
|
| 195 |
+
Callback function to save plot files as an ADK artifact.
|
| 196 |
+
"""
|
| 197 |
+
# We just want to see the plot in the conversation;
|
| 198 |
+
# no need for an extra LLM call to tell us it's there.
|
| 199 |
+
# This also prevents the model from trying to rerun the code,
|
| 200 |
+
# so we can directly show the error message.
|
| 201 |
+
tool_context.actions.skip_summarization = True
|
| 202 |
+
|
| 203 |
+
if tool.name in ["make_plot", "make_ggplot"]:
|
| 204 |
+
# In ADK 1.17.0, tool_response is a dict (i.e. result of model_dump method invoked on MCP CallToolResult instance):
|
| 205 |
+
# https://github.com/google/adk-python/commit/4df926388b6e9ebcf517fbacf2f5532fd73b0f71
|
| 206 |
+
# https://github.com/modelcontextprotocol/python-sdk?tab=readme-ov-file#parsing-tool-results
|
| 207 |
+
if "content" in tool_response and not tool_response["isError"]:
|
| 208 |
+
for content in tool_response["content"]:
|
| 209 |
+
if "type" in content and content["type"] == "text":
|
| 210 |
+
# Convert tool response (hex string) to bytes
|
| 211 |
+
byte_data = bytes.fromhex(content["text"])
|
| 212 |
+
|
| 213 |
+
# Detect file type from magic number
|
| 214 |
+
mime_type, file_extension = detect_file_type(byte_data)
|
| 215 |
+
|
| 216 |
+
# Encode binary data to Base64 format
|
| 217 |
+
encoded = base64.b64encode(byte_data).decode("utf-8")
|
| 218 |
+
artifact_part = types.Part(
|
| 219 |
+
inline_data={
|
| 220 |
+
"data": encoded,
|
| 221 |
+
"mime_type": mime_type,
|
| 222 |
+
}
|
| 223 |
+
)
|
| 224 |
+
# Use second part of tool name (e.g. make_ggplot -> ggplot.png)
|
| 225 |
+
filename = f"{tool.name.split("_", 1)[1]}.{file_extension}"
|
| 226 |
+
await tool_context.save_artifact(
|
| 227 |
+
filename=filename, artifact=artifact_part
|
| 228 |
+
)
|
| 229 |
+
# Format the success message as a tool response
|
| 230 |
+
text = f"Plot created and saved as an artifact: {filename}"
|
| 231 |
+
response = CallToolResult(
|
| 232 |
+
content=[TextContent(type="text", text=text)],
|
| 233 |
+
)
|
| 234 |
+
return response.model_dump(exclude_none=True, mode="json")
|
| 235 |
+
|
| 236 |
+
# Passthrough for other tools or no matching content (e.g. tool error)
|
| 237 |
+
return None
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
# Create agent to run R code
|
| 241 |
+
run_agent = LlmAgent(
|
| 242 |
+
name="Run",
|
| 243 |
+
description="Runs R code without making plots. Use the `Run` agent for executing code that does not load data or make a plot.",
|
| 244 |
+
model=model,
|
| 245 |
+
instruction=Run,
|
| 246 |
+
tools=[
|
| 247 |
+
McpToolset(
|
| 248 |
+
connection_params=connection_params,
|
| 249 |
+
tool_filter=["run_visible", "run_hidden"],
|
| 250 |
+
)
|
| 251 |
+
],
|
| 252 |
+
before_model_callback=[preprocess_artifact, preprocess_messages],
|
| 253 |
+
before_tool_callback=catch_tool_errors,
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
# Create agent to load data
|
| 257 |
+
data_agent = LlmAgent(
|
| 258 |
+
name="Data",
|
| 259 |
+
description="Loads data into an R data frame and summarizes it. Use the `Data` agent for loading data from a file or URL before making a plot.",
|
| 260 |
+
model=model,
|
| 261 |
+
instruction=Data,
|
| 262 |
+
tools=[
|
| 263 |
+
McpToolset(
|
| 264 |
+
connection_params=connection_params,
|
| 265 |
+
tool_filter=["run_visible"],
|
| 266 |
+
)
|
| 267 |
+
],
|
| 268 |
+
before_model_callback=[preprocess_artifact, preprocess_messages],
|
| 269 |
+
before_tool_callback=catch_tool_errors,
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
# Create agent to make plots using R code
|
| 273 |
+
plot_agent = LlmAgent(
|
| 274 |
+
name="Plot",
|
| 275 |
+
description="Makes plots using R code. Use the `Plot` agent after loading any required data.",
|
| 276 |
+
model=model,
|
| 277 |
+
instruction=Plot,
|
| 278 |
+
tools=[
|
| 279 |
+
McpToolset(
|
| 280 |
+
connection_params=connection_params,
|
| 281 |
+
tool_filter=["make_plot", "make_ggplot"],
|
| 282 |
+
)
|
| 283 |
+
],
|
| 284 |
+
before_model_callback=[preprocess_artifact, preprocess_messages],
|
| 285 |
+
before_tool_callback=catch_tool_errors,
|
| 286 |
+
after_tool_callback=save_plot_artifact,
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
# Create parent agent and assign children via sub_agents
|
| 290 |
+
root_agent = LlmAgent(
|
| 291 |
+
name="Coordinator",
|
| 292 |
+
# "Use the..." tells sub-agents to transfer to Coordinator for help requests
|
| 293 |
+
description="Multi-agent system for performing actions in R. Use the `Coordinator` agent for getting help on packages, datasets, and functions.",
|
| 294 |
+
model=model,
|
| 295 |
+
instruction=Root,
|
| 296 |
+
# To pass control back to root, the help and run functions should be tools or a ToolAgent (not sub_agent)
|
| 297 |
+
tools=[
|
| 298 |
+
McpToolset(
|
| 299 |
+
connection_params=connection_params,
|
| 300 |
+
tool_filter=["help_package", "help_topic"],
|
| 301 |
+
)
|
| 302 |
+
],
|
| 303 |
+
sub_agents=[
|
| 304 |
+
run_agent,
|
| 305 |
+
data_agent,
|
| 306 |
+
plot_agent,
|
| 307 |
+
],
|
| 308 |
+
# Select R session
|
| 309 |
+
before_agent_callback=select_r_session,
|
| 310 |
+
# Save user-uploaded artifact as a temporary file and modify messages to point to this file
|
| 311 |
+
before_model_callback=[preprocess_artifact, preprocess_messages],
|
| 312 |
+
before_tool_callback=catch_tool_errors,
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
app = App(
|
| 316 |
+
name="PlotMyData",
|
| 317 |
+
root_agent=root_agent,
|
| 318 |
+
# This inserts user messages like '[Uploaded Artifact: "breast-cancer.csv"]'
|
| 319 |
+
plugins=[SaveFilesAsArtifactsPlugin()],
|
| 320 |
+
)
|
README.md
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
emoji: 👀
|
| 4 |
colorFrom: indigo
|
| 5 |
colorTo: red
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
-
short_description: Data analysis and plotting with ADK, MCP, and R
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: PlotMyData
|
| 3 |
emoji: 👀
|
| 4 |
colorFrom: indigo
|
| 5 |
colorTo: red
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
+
short_description: Data analysis and plotting with Google ADK, MCP, and R
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
data_summary.R
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Summarize a data frame, for example:
|
| 2 |
+
# Data frame dimensions: 10 rows x 3 columns
|
| 3 |
+
# Data Summary:
|
| 4 |
+
# col1: integer
|
| 5 |
+
# col2: numeric, missing=3
|
| 6 |
+
# col3: character
|
| 7 |
+
|
| 8 |
+
data_summary <- function(df) {
|
| 9 |
+
nrows <- nrow(df)
|
| 10 |
+
ncols <- ncol(df)
|
| 11 |
+
lines <- c(sprintf("Data frame dimensions: %d rows x %d columns", nrows, ncols), "Data Summary:")
|
| 12 |
+
|
| 13 |
+
# Helper for R data type names
|
| 14 |
+
type_map <- function(x) {
|
| 15 |
+
if (is.factor(x)) return("factor")
|
| 16 |
+
if (is.character(x)) return("character")
|
| 17 |
+
if (is.logical(x)) return("logical")
|
| 18 |
+
if (inherits(x, "Date")) return("Date")
|
| 19 |
+
if (is.numeric(x)) {
|
| 20 |
+
vals <- x[!is.na(x)]
|
| 21 |
+
if (length(vals) > 0 && all(abs(vals - round(vals)) < .Machine$double.eps^0.5)) return("integer")
|
| 22 |
+
return("numeric")
|
| 23 |
+
}
|
| 24 |
+
return(class(x)[1])
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
for (col in names(df)) {
|
| 28 |
+
dtype <- type_map(df[[col]])
|
| 29 |
+
miss <- sum(is.na(df[[col]]))
|
| 30 |
+
if (miss > 0) {
|
| 31 |
+
lines <- c(lines, sprintf("%s: %s, missing=%d", col, dtype, miss))
|
| 32 |
+
} else {
|
| 33 |
+
lines <- c(lines, sprintf("%s: %s", col, dtype))
|
| 34 |
+
}
|
| 35 |
+
}
|
| 36 |
+
paste(lines, collapse = "\n")
|
| 37 |
+
}
|
docker/entrypoint.sh
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
+
|
| 3 |
+
# Exit immediately on errors
|
| 4 |
+
set -e
|
| 5 |
+
|
| 6 |
+
# MCP session setup for persistent R environment
|
| 7 |
+
# Create .Rprofile to run mcp_session() when R starts
|
| 8 |
+
echo "library(tidyverse); source('data_summary.R'); mcptools::mcp_session()" > .Rprofile
|
| 9 |
+
|
| 10 |
+
# Start R in a detached screen session
|
| 11 |
+
# TODO: Look at using supervisord for another way to run multiple services
|
| 12 |
+
# https://docs.docker.com/engine/containers/multi-service_container/#use-a-process-manager
|
| 13 |
+
screen -d -m R
|
| 14 |
+
|
| 15 |
+
# Activate virtual environment
|
| 16 |
+
export PATH="/opt/venv/bin:$PATH"
|
| 17 |
+
|
| 18 |
+
# Set OpenAI model
|
| 19 |
+
export OPENAI_MODEL_NAME=gpt-4o
|
| 20 |
+
#export OPENAI_API_KEY=$(cat /run/secrets/openai-api-key)
|
| 21 |
+
echo "Using OpenAI with ${OPENAI_MODEL_NAME}"
|
| 22 |
+
|
| 23 |
+
exec adk web --host 0.0.0.0 --port 8080 --reload_agents
|
prompts.R
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
make_plot_prompt <- '
|
| 2 |
+
Runs R code to make a plot with base R graphics.
|
| 3 |
+
|
| 4 |
+
Args:
|
| 5 |
+
code: R code to run
|
| 6 |
+
|
| 7 |
+
Returns:
|
| 8 |
+
Binary image data
|
| 9 |
+
|
| 10 |
+
Details:
|
| 11 |
+
`code` should be R code that begins with e.g. `png(filename)` and ends with `dev.off()`.
|
| 12 |
+
Always use the variable `filename` instead of an actual file name.
|
| 13 |
+
|
| 14 |
+
Example: User requests "Plot x (1,2,3) and y (10,20,30)", then `code` is:
|
| 15 |
+
|
| 16 |
+
png(filename)
|
| 17 |
+
x <- c(1, 2, 3)
|
| 18 |
+
y <- c(10, 20, 30)
|
| 19 |
+
plot(x, y)
|
| 20 |
+
dev.off()
|
| 21 |
+
|
| 22 |
+
Example: User requests "Give me a 8.5x11 inch PDF of y = x^2 from -1 to 1, large font, titled with the function", then `code` is:
|
| 23 |
+
|
| 24 |
+
pdf(filename, width = 8.5, height = 11)
|
| 25 |
+
par(cex = 2)
|
| 26 |
+
x <- seq(-1, 1, length.out = 100)
|
| 27 |
+
y <- x^2
|
| 28 |
+
plot(x, y, type = "l")
|
| 29 |
+
title(main = quote(y == x^2))
|
| 30 |
+
dev.off()
|
| 31 |
+
|
| 32 |
+
Example: User requests "Plot radius_worst (y) vs radius_mean (x) from https://zenodo.org/records/3608984/files/breastcancer.csv?download=1", then `code` is:
|
| 33 |
+
|
| 34 |
+
png(filename)
|
| 35 |
+
df <- read.csv("https://zenodo.org/records/3608984/files/breastcancer.csv?download=1")
|
| 36 |
+
plot(df$radius_mean, df$radius_worst, xlab = "radius_worst", ylab = "radius_mean")
|
| 37 |
+
dev.off()
|
| 38 |
+
|
| 39 |
+
Example: User requests "Plot radius_worst (y) vs radius_mean (x)" and [Uploaded File: "/tmp/uploads/breast-cancer.csv"], then `code` is:
|
| 40 |
+
|
| 41 |
+
png(filename)
|
| 42 |
+
df <- read.csv("/tmp/uploads/breast-cancer.csv")
|
| 43 |
+
plot(df$radius_mean, df$radius_worst, xlab = "radius_worst", ylab = "radius_mean")
|
| 44 |
+
dev.off()
|
| 45 |
+
'
|
| 46 |
+
|
| 47 |
+
make_ggplot_prompt <- '
|
| 48 |
+
Runs R code to make a plot with ggplot/ggplot2.
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
code: R code to run
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
Binary image data
|
| 55 |
+
|
| 56 |
+
Details:
|
| 57 |
+
`code` should be R code that begins with `library(ggplot2)` and ends with `ggsave(filename, device = "png")`.
|
| 58 |
+
|
| 59 |
+
Example: User requests "ggplot wt vs mpg from mtcars", then `code` is:
|
| 60 |
+
|
| 61 |
+
library(ggplot2)
|
| 62 |
+
ggplot(mtcars, aes(mpg, wt)) +
|
| 63 |
+
geom_point()
|
| 64 |
+
ggsave(filename, device = "png")
|
| 65 |
+
|
| 66 |
+
Example: User requests "ggplot wt vs mpg from mtcars as pdf", then `code` is:
|
| 67 |
+
|
| 68 |
+
library(ggplot2)
|
| 69 |
+
ggplot(mtcars, aes(mpg, wt)) +
|
| 70 |
+
geom_point()
|
| 71 |
+
ggsave(filename, device = "pdf")
|
| 72 |
+
|
| 73 |
+
Important notes:
|
| 74 |
+
|
| 75 |
+
- `code` must end with ggsave(filename, device = ) with a specified device.
|
| 76 |
+
- Use `device = "png"` unless the user requests a different format.
|
| 77 |
+
- Always use the variable `filename` instead of an actual file name.
|
| 78 |
+
'
|
| 79 |
+
|
| 80 |
+
help_topic_prompt <- '
|
| 81 |
+
Gets documentation for a dataset, function, or other topic.
|
| 82 |
+
|
| 83 |
+
Args:
|
| 84 |
+
topic: Topic or function to get help for.
|
| 85 |
+
|
| 86 |
+
Returns:
|
| 87 |
+
Documentation text. May include runnable R examples.
|
| 88 |
+
|
| 89 |
+
Examples:
|
| 90 |
+
- Show the arguments of the `lm` function: help_topic("lm").
|
| 91 |
+
- Show the format of the `airquality` dataset: help_topic("airquality").
|
| 92 |
+
- Get variables in `Titanic`: help_topic("Titanic").
|
| 93 |
+
'
|
| 94 |
+
|
| 95 |
+
help_package_prompt <- '
|
| 96 |
+
Summarizes datasets and functions in an R package.
|
| 97 |
+
|
| 98 |
+
Args:
|
| 99 |
+
package: Package to get help for.
|
| 100 |
+
|
| 101 |
+
Returns:
|
| 102 |
+
Documentation text. Includes a package description and index of functions and datasets.
|
| 103 |
+
|
| 104 |
+
Examples:
|
| 105 |
+
- Get the names of R datasets: help_package("datasets").
|
| 106 |
+
- List graphics functions in base R: help_package("graphics").
|
| 107 |
+
'
|
| 108 |
+
|
| 109 |
+
run_visible_prompt <- '
|
| 110 |
+
Runs R code and returns the result.
|
| 111 |
+
Does not make plots.
|
| 112 |
+
|
| 113 |
+
Args:
|
| 114 |
+
code: R code to run.
|
| 115 |
+
|
| 116 |
+
Returns:
|
| 117 |
+
Result of R code execution.
|
| 118 |
+
'
|
| 119 |
+
|
| 120 |
+
run_hidden_prompt <- '
|
| 121 |
+
Run R code without returning the result.
|
| 122 |
+
Does not make plots.
|
| 123 |
+
|
| 124 |
+
Args:
|
| 125 |
+
code: R code to run.
|
| 126 |
+
|
| 127 |
+
Returns:
|
| 128 |
+
Nothing.
|
| 129 |
+
|
| 130 |
+
NOTE: Choose this tool if:
|
| 131 |
+
- The user asks to save the result in a variable, or
|
| 132 |
+
- You are performing intermediate calculations before making a plot.
|
| 133 |
+
'
|
prompts.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Root = """
|
| 2 |
+
Your purpose is to interact with an R session to perform data analysis and visualization on the user's behalf.
|
| 3 |
+
You cannot run code directly, but may use the `Data`, `Plot`, and `Run` agents.
|
| 4 |
+
|
| 5 |
+
Only use the `Run` agent if the following conditions are both true:
|
| 6 |
+
|
| 7 |
+
- The operation is requested by the user ("calculate" or "run"), and
|
| 8 |
+
- The code does not make a plot, chart, graph, or any other visualization.
|
| 9 |
+
|
| 10 |
+
You may call a help tool before transfering control to an agent:
|
| 11 |
+
|
| 12 |
+
- If an R dataset ("dataset") is requested, use help_package('datasets') to find the correct dataset name.
|
| 13 |
+
- If the user requests documentation for specific datasets or functions, use the `help_topic` tool.
|
| 14 |
+
|
| 15 |
+
Examples:
|
| 16 |
+
|
| 17 |
+
- Query includes "?boxplot": The user is requesting documentation. Call help_topic('boxplot') then transfer to an agent.
|
| 18 |
+
- "Plot distance vs speed from the cars dataset": This is a plot request using an R dataset. Call help_package('datasets') then transfer to the `Data` agent.
|
| 19 |
+
- "Calculate x = cos(x) for x = 0 to 12 and make a plot": This is a plot that does not require data. Transfer to the `Plot` agent.
|
| 20 |
+
- "Run x <- 2": This is code execution without data or plot. Transfer to the `Run` agent.
|
| 21 |
+
- "Load the data": The user is asking to load data from an uploaded file. Transfer to the `Data` agent.
|
| 22 |
+
|
| 23 |
+
Important notes:
|
| 24 |
+
|
| 25 |
+
- Data may be provided directly by the user, in a URL, in an "Uploaded File" message, or an R dataset.
|
| 26 |
+
- You must not use the `Run` agent to make a plot or execute any other plotting commands.
|
| 27 |
+
- The only way to make a plot, chart, graph, or other visualization is to transfer to the `Data` or `Plot` agents.
|
| 28 |
+
- Do not use install.packages(), library(), or any other commands for package installation and loading.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
Run = """
|
| 32 |
+
You are an agent that runs R code using the `run_visible` and `run_hidden` tools.
|
| 33 |
+
You cannot make plots.
|
| 34 |
+
|
| 35 |
+
Perform the following actions:
|
| 36 |
+
- Interpret the user's request as R code.
|
| 37 |
+
- If the code makes a plot (including ggplot or any other type of graph or visualization), transfer to the `Plot` agent.
|
| 38 |
+
- If the code assigns the result to a variable, pass the code to the `run_hidden` tool.
|
| 39 |
+
- Otherwise, pass the code to the `run_visible` tool.
|
| 40 |
+
|
| 41 |
+
Important notes:
|
| 42 |
+
|
| 43 |
+
- The `run_hidden` tool runs R commands without returning the result. This is useful for reducing LLM token usage while working with large variables.
|
| 44 |
+
- You can use dplyr, tidyr, and other tidyverse packages.
|
| 45 |
+
- Your response should always be valid, self-contained R code.
|
| 46 |
+
- If the tool response is an error (isError: true), respond with the exact text of the error message and stop running code.
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
Data = """
|
| 50 |
+
You are an agent that loads and summarizes data.
|
| 51 |
+
Your main task has three parts:
|
| 52 |
+
|
| 53 |
+
1. Generate R code to create a `df` object and summarize it with `data_summary(df)`.
|
| 54 |
+
2. Use the `run_visible` tool to execute the code.
|
| 55 |
+
3. Transfer to the `Plot` agent to make a plot.
|
| 56 |
+
|
| 57 |
+
Choose the first available data source:
|
| 58 |
+
|
| 59 |
+
1: Data provided directly by the user.
|
| 60 |
+
2: File provided in an "Uploaded File" message. Do not use other files.
|
| 61 |
+
3: URL provided by the user. Do not use other URLs.
|
| 62 |
+
4: Available R dataset that matches the user's request.
|
| 63 |
+
|
| 64 |
+
Examples of code for `run_visible`:
|
| 65 |
+
|
| 66 |
+
- User requests "plot 1,2,3 10,20,30": code is `df <- data.frame(x = c(1,2,3), y = (10, 20, 30))
|
| 67 |
+
data_summary(df)`.
|
| 68 |
+
- User requests "plot cars data": code is `df <- data.frame(cars)
|
| 69 |
+
data_summary(df)`
|
| 70 |
+
- To read CSV data from a URL, use `df <- read.csv(csv_url)`, where csv_url is the exact URL provided by the user.
|
| 71 |
+
- To read CSV data from a file, use `df <- read.csv(file_path)`, where file_path is provided in an "Uploaded File" user message.
|
| 72 |
+
|
| 73 |
+
What to do after calling `run_visible`:
|
| 74 |
+
|
| 75 |
+
- If "Data Summary" exists and the user requested a plot, then pass control to the `Plot` agent.
|
| 76 |
+
- If "Data Summary" exists and the user did not request a plot, then stop the workflow.
|
| 77 |
+
- If the user provided data but "Data Summary" does not exist, then stop and report a problem.
|
| 78 |
+
|
| 79 |
+
Important notes:
|
| 80 |
+
|
| 81 |
+
- Do not use the `run_visible` tool to make a plot.
|
| 82 |
+
- Run `data_summary(df)` in your code. Do not run `summary(df)`.
|
| 83 |
+
- You can use dplyr, tidyr, and other tidyverse packages.
|
| 84 |
+
"""
|
| 85 |
+
|
| 86 |
+
Plot = """
|
| 87 |
+
You are an agent that makes plots with R code using the `make_plot` and `make_ggplot` tools.
|
| 88 |
+
|
| 89 |
+
Coding strategy:
|
| 90 |
+
|
| 91 |
+
- Use previously assigned variables (especially `df`) in your code.
|
| 92 |
+
- Do not load data yourself.
|
| 93 |
+
- Use a specific variable other than `df` if it is better for making the plot.
|
| 94 |
+
- Choose column names in `df` based on the user's request.
|
| 95 |
+
- Column names are case-sensitive, syntactically valid R names.
|
| 96 |
+
- Look in the Data Summary for details.
|
| 97 |
+
- No data are required for plotting functions and simulations.
|
| 98 |
+
|
| 99 |
+
Plot tools:
|
| 100 |
+
|
| 101 |
+
- For base R graphics use the `make_plot` tool.
|
| 102 |
+
- For ggplot/ggplot2 use the `make_ggplot` tool.
|
| 103 |
+
- Both of these tools save the plot as a conversation artifact that is visible to the user.
|
| 104 |
+
|
| 105 |
+
Examples:
|
| 106 |
+
- User requests to plot "dates", but the Data Summary lists a "Date" column. Answer: use `df$Date`.
|
| 107 |
+
- User requests to plot "volcano", but `df` also exists. Answer: The `volcano` matrix is better for images; use `image(volcano)`.
|
| 108 |
+
|
| 109 |
+
Important notes:
|
| 110 |
+
|
| 111 |
+
- Use base R graphics unless the user asks for ggplot or ggplot2.
|
| 112 |
+
- Pay attention to the user's request and use your knowledge of R to write code that gives the best-looking plot.
|
| 113 |
+
- Your response should always be valid, self-contained R code.
|
| 114 |
+
"""
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
google-adk==1.22.0
|
| 2 |
+
litellm==1.80.13
|
| 3 |
+
mcp==1.25.0
|
server.R
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 20251009 Added plot tool
|
| 2 |
+
# 20251023 Added help tools
|
| 3 |
+
|
| 4 |
+
# Load ellmer for tool() and type_*()
|
| 5 |
+
library(ellmer)
|
| 6 |
+
|
| 7 |
+
# Read prompts
|
| 8 |
+
source("prompts.R")
|
| 9 |
+
|
| 10 |
+
# Get help for a package
|
| 11 |
+
help_package <- function(package) {
|
| 12 |
+
help_page <- help(package = (package), help_type = "text")
|
| 13 |
+
paste(unlist(help_page$info), collapse = "\n")
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
# Get help for a topic
|
| 17 |
+
# Adapted from https://github.com/posit-dev/btw:::help_to_rd
|
| 18 |
+
help_topic <- function(topic) {
|
| 19 |
+
help_page <- help(topic = (topic), help_type = "text")
|
| 20 |
+
if(length(help_page) == 0) {
|
| 21 |
+
return(paste0("No help found for '", topic, "'. Please check the name and try again."))
|
| 22 |
+
}
|
| 23 |
+
# Handle multiple help files for a topic
|
| 24 |
+
# e.g. help_topic(plot) returns the help for both base::plot and graphics::plot.default
|
| 25 |
+
help_paths <- as.character(help_page)
|
| 26 |
+
help_result <- sapply(help_paths, function(help_path) {
|
| 27 |
+
rd_name <- basename(help_path)
|
| 28 |
+
rd_package <- basename(dirname(dirname(help_path)))
|
| 29 |
+
db <- tools::Rd_db(rd_package)[[paste0(rd_name, ".Rd")]]
|
| 30 |
+
paste(as.character(db), collapse = "")
|
| 31 |
+
})
|
| 32 |
+
# Insert headings to help the LLM distinguish multiple help files
|
| 33 |
+
# Heading before each help file (e.g. Help file 1, Help file 2)
|
| 34 |
+
help_result <- paste0("## Help file ", seq_along(help_result), ":\n", help_result)
|
| 35 |
+
# Heading at start of message (e.g. 2 help files were retrieved)
|
| 36 |
+
if(length(help_paths) == 1) help_info <- paste0("# ", length(help_paths), " help file was retrieved: ", paste(help_paths, collapse = ", "), ":\n")
|
| 37 |
+
if(length(help_paths) > 1) help_info <- paste0("# ", length(help_paths), " help files were retrieved: ", paste(help_paths, collapse = ", "), ":\n")
|
| 38 |
+
help_result <- c(help_info, help_result)
|
| 39 |
+
help_result
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
# Run R code and return the result
|
| 43 |
+
# https://github.com/posit-dev/mcptools/issues/71
|
| 44 |
+
run_visible <- function(code) {
|
| 45 |
+
eval(parse(text = code), globalenv())
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
# Run R code without returning the result
|
| 49 |
+
# https://github.com/posit-dev/mcptools/issues/71
|
| 50 |
+
run_hidden <- function(code) {
|
| 51 |
+
eval(parse(text = code), globalenv())
|
| 52 |
+
return("The code executed successfully")
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
# Run R code to make a plot and return the image data
|
| 56 |
+
make_plot <- function(code) {
|
| 57 |
+
# Cursor, Bing and Google AI all suggest this but it causes an error:
|
| 58 |
+
# Error in png(filename = raw_conn) :
|
| 59 |
+
# 'filename' must be a non-empty character string
|
| 60 |
+
## Write plot to an in-memory PNG
|
| 61 |
+
#raw_conn <- rawConnection(raw(), open = "wb")
|
| 62 |
+
#png(filename = raw_conn)
|
| 63 |
+
|
| 64 |
+
# Use a temporary file to save the plot
|
| 65 |
+
filename <- tempfile(fileext = ".dat")
|
| 66 |
+
on.exit(unlink(filename))
|
| 67 |
+
|
| 68 |
+
# Run the plotting code (this should include e.g. png() and dev.off())
|
| 69 |
+
# The code uses a local variable (filename), so don't use envir = globalenv() here
|
| 70 |
+
eval(parse(text = code))
|
| 71 |
+
|
| 72 |
+
# Return a PNG image as raw bytes so ADK can save it as an artifact
|
| 73 |
+
readr::read_file_raw(filename)
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
# This is the same code as make_plot() but has a different tool description
|
| 77 |
+
make_ggplot <- function(code) {
|
| 78 |
+
filename <- tempfile(fileext = ".dat")
|
| 79 |
+
on.exit(unlink(filename))
|
| 80 |
+
eval(parse(text = code))
|
| 81 |
+
readr::read_file_raw(filename)
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
mcptools::mcp_server(tools = list(
|
| 85 |
+
|
| 86 |
+
tool(
|
| 87 |
+
help_package,
|
| 88 |
+
help_package_prompt,
|
| 89 |
+
arguments = list(
|
| 90 |
+
package = type_string("Package to get help for.")
|
| 91 |
+
)
|
| 92 |
+
),
|
| 93 |
+
|
| 94 |
+
tool(
|
| 95 |
+
help_topic,
|
| 96 |
+
help_topic_prompt,
|
| 97 |
+
arguments = list(
|
| 98 |
+
topic = type_string("Topic or function to get help for.")
|
| 99 |
+
)
|
| 100 |
+
),
|
| 101 |
+
|
| 102 |
+
tool(
|
| 103 |
+
run_visible,
|
| 104 |
+
run_visible_prompt,
|
| 105 |
+
arguments = list(
|
| 106 |
+
code = type_string("R code to run.")
|
| 107 |
+
)
|
| 108 |
+
),
|
| 109 |
+
|
| 110 |
+
tool(
|
| 111 |
+
run_hidden,
|
| 112 |
+
run_hidden_prompt,
|
| 113 |
+
arguments = list(
|
| 114 |
+
code = type_string("R code to run.")
|
| 115 |
+
)
|
| 116 |
+
),
|
| 117 |
+
|
| 118 |
+
tool(
|
| 119 |
+
make_plot,
|
| 120 |
+
make_plot_prompt,
|
| 121 |
+
arguments = list(
|
| 122 |
+
code = type_string("R code to make the plot.")
|
| 123 |
+
)
|
| 124 |
+
),
|
| 125 |
+
|
| 126 |
+
tool(
|
| 127 |
+
make_ggplot,
|
| 128 |
+
make_ggplot_prompt,
|
| 129 |
+
arguments = list(
|
| 130 |
+
code = type_string("R code to make the plot.")
|
| 131 |
+
)
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
))
|