jedick commited on
Commit
9e909f5
·
1 Parent(s): 93cd4db

Add app files

Browse files
Files changed (10) hide show
  1. Dockerfile +43 -0
  2. PlotMyData/__init__.py +9 -0
  3. PlotMyData/agent.py +320 -0
  4. README.md +2 -2
  5. data_summary.R +37 -0
  6. docker/entrypoint.sh +23 -0
  7. prompts.R +133 -0
  8. prompts.py +114 -0
  9. requirements.txt +3 -0
  10. server.R +134 -0
Dockerfile ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Declare the base image
2
+ FROM rocker/r-ver:latest
3
+
4
+ # Considerations for local development: reduce Docker cache size and rebuild time
5
+ # Single RUN directive and two COPY directives
6
+ # Pre-RUN COPY for relatively stable files, post-RUN COPY for app files
7
+ # Avoid other directives like USER and ENV
8
+ # entrypoint.sh activates the virtual environment for running the app
9
+ # Considerations for remote development (HF Spaces Dev Mode)
10
+ # Dev Mode requires useradd, chown and USER
11
+ # Use CMD instead of ENTRYPOINT
12
+
13
+ # Set working directory and copy non-app files
14
+ WORKDIR /app
15
+ COPY requirements.txt docker/entrypoint.sh .
16
+
17
+ # Install Python and system tools
18
+ # Create and activate virtual environment for installing packages
19
+ # Install required Python and R packages
20
+ # Make startup script executable
21
+ # Add user for HF Spaces Dev Mode and chown /app directory for user
22
+ RUN apt-get update && \
23
+ apt-get install -y python3 python3-pip python3-venv screen vim && \
24
+ apt-get clean && \
25
+ rm -rf /var/lib/apt/lists/* && \
26
+ python3 -m venv /opt/venv && \
27
+ export PATH="/opt/venv/bin:$PATH" && \
28
+ pip --no-cache-dir install -r requirements.txt && \
29
+ R -q -e 'install.packages(c("ellmer", "mcptools", "readr", "ggplot2", "tidyverse"))' && \
30
+ chmod +x entrypoint.sh && \
31
+ chown -r 1000:1000 /app && \
32
+ useradd -m -u 1000 user
33
+
34
+ # Copy app files with user permissions
35
+ # NOTE: because the repo has docker/entrypoint.sh, this does *not*
36
+ # overwrite the entrypoint.sh that we made executable above
37
+ COPY --chown=1000 . /app
38
+
39
+ # Set the user for Dev Mode
40
+ USER 1000
41
+
42
+ # Set default command (executable file in WORKDIR)
43
+ CMD [ "/app/entrypoint.sh" ]
PlotMyData/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from . import agent
4
+
5
+ # Ensure upload directory exists
6
+ upload_dir = "/tmp/uploads"
7
+ Path(upload_dir).mkdir(parents=True, exist_ok=True)
8
+ # Read, write, execute for owner; read and execute for others
9
+ os.chmod(upload_dir, 0o755)
PlotMyData/agent.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google.adk.plugins.save_files_as_artifacts_plugin import SaveFilesAsArtifactsPlugin
2
+ from google.adk.tools.mcp_tool.mcp_session_manager import StdioConnectionParams
3
+ from google.adk.tools.mcp_tool.mcp_session_manager import SseConnectionParams
4
+ from google.adk.tools.mcp_tool.mcp_toolset import McpToolset
5
+ from google.adk.tools.tool_context import ToolContext
6
+ from google.adk.tools.base_tool import BaseTool
7
+ from google.adk.agents.callback_context import CallbackContext
8
+ from google.adk.agents import LlmAgent
9
+ from google.adk.models import LlmResponse, LlmRequest
10
+ from google.adk.models.lite_llm import LiteLlm
11
+ from google.adk.apps import App
12
+ from google.genai import types
13
+ from mcp import ClientSession, StdioServerParameters
14
+ from mcp.types import CallToolResult, TextContent
15
+ from mcp.client.stdio import stdio_client
16
+ from typing import Dict, Any, Optional, Tuple
17
+ from prompts import Root, Run, Data, Plot
18
+ import base64
19
+ import os
20
+
21
+ # Define MCP server parameters
22
+ server_params = StdioServerParameters(
23
+ command="Rscript",
24
+ args=[
25
+ # Use --vanilla to ignore .Rprofile, which is meant for the R instance running mcp_session()
26
+ "--vanilla",
27
+ "server.R",
28
+ ],
29
+ )
30
+ # STDIO transport to local R MCP server
31
+ connection_params = StdioConnectionParams(server_params=server_params, timeout=10)
32
+
33
+ # Define model
34
+ # If we're using the OpenAI API, get the value of OPENAI_MODEL_NAME set by entrypoint.sh
35
+ # If we're using an OpenAI-compatible endpoint (Docker Model Runner), use a fake API key
36
+ model = LiteLlm(
37
+ model=os.environ.get("OPENAI_MODEL_NAME", ""),
38
+ api_key=os.environ.get("OPENAI_API_KEY", "fake-API-key"),
39
+ )
40
+
41
+
42
+ async def select_r_session(
43
+ callback_context: CallbackContext,
44
+ ) -> Optional[types.Content]:
45
+ """
46
+ Callback function to select the first R session.
47
+ """
48
+ async with stdio_client(server_params) as (read, write):
49
+ async with ClientSession(read, write) as session:
50
+ await session.initialize()
51
+ await session.call_tool("select_r_session", {"session": 1})
52
+ print("[select_r_session] R session selected!")
53
+ # Return None to allow the LlmAgent's normal execution
54
+ return None
55
+
56
+
57
+ async def catch_tool_errors(tool: BaseTool, args: dict, tool_context: ToolContext):
58
+ """
59
+ Callback function to catch errors from tool calls and turn them into a message.
60
+ Modified from https://github.com/google/adk-python/discussions/795#discussioncomment-13460659
61
+ """
62
+ try:
63
+ return await tool.run_async(args=args, tool_context=tool_context)
64
+ except Exception as e:
65
+ # Format the error as a tool response
66
+ # https://github.com/google/adk-python/commit/4df926388b6e9ebcf517fbacf2f5532fd73b0f71
67
+ response = CallToolResult(
68
+ # The error has class McpError; use e.error.message to get the text
69
+ content=[TextContent(type="text", text=e.error.message)],
70
+ isError=True,
71
+ )
72
+ return response.model_dump(exclude_none=True, mode="json")
73
+
74
+
75
+ async def preprocess_artifact(
76
+ callback_context: CallbackContext, llm_request: LlmRequest
77
+ ) -> Optional[LlmResponse]:
78
+ """
79
+ Callback function to copy the latest artifact to a temporary file.
80
+ """
81
+
82
+ # Callback and artifact handling code modified from:
83
+ # https://google.github.io/adk-docs/callbacks/types-of-callbacks/#before-model-callback
84
+ # https://github.com/google/adk-python/issues/2176#issuecomment-3395469070
85
+
86
+ # Get the last user message in the request contents
87
+ last_user_message = llm_request.contents[-1].parts[-1].text
88
+
89
+ # Function call events have no text part, so set this to "" for string search in the next step
90
+ if last_user_message is None:
91
+ last_user_message = ""
92
+
93
+ # If a file was uploaded then SaveFilesAsArtifactsPlugin() adds "[Uploaded Artifact: file_name.csv]" to the user message
94
+ # Check for "Uploaded Artifact:" in the last user message
95
+ if "Uploaded Artifact:" in last_user_message:
96
+
97
+ # Add a text part only if there are any issues with accessing or saving the artifact
98
+ added_text = ""
99
+ # List available artifacts
100
+ artifacts = await callback_context.list_artifacts()
101
+ if len(artifacts) == 0:
102
+ added_text = "No uploaded file is available"
103
+ else:
104
+ most_recent_file = artifacts[-1]
105
+ try:
106
+ # Get artifact and byte data
107
+ artifact = await callback_context.load_artifact(
108
+ filename=most_recent_file
109
+ )
110
+ byte_data = artifact.inline_data.data
111
+ # Save artifact as temporary file
112
+ tmp_dir = "/tmp/uploads"
113
+ tmp_file_path = os.path.join(tmp_dir, most_recent_file)
114
+ # Write the file
115
+ with open(tmp_file_path, "wb") as f:
116
+ f.write(byte_data)
117
+ # Set appropriate permissions
118
+ os.chmod(tmp_file_path, 0o644)
119
+ print(f"[preprocess_artifact] Saved artifact as '{tmp_file_path}'")
120
+
121
+ except Exception as e:
122
+ added_text = f"Error processing artifact: {str(e)}"
123
+
124
+ # If there were any issues, add a new part to the user message
125
+ if added_text:
126
+ # llm_request.contents[-1].parts.append(types.Part(text=added_text))
127
+ llm_request.contents[0].parts.append(types.Part(text=added_text))
128
+ print(
129
+ f"[preprocess_artifact] Added text part to user message: '{added_text}'"
130
+ )
131
+
132
+ # Return None to allow the possibly modified request to go to the LLM
133
+ return None
134
+
135
+
136
+ async def preprocess_messages(
137
+ callback_context: CallbackContext, llm_request: LlmRequest
138
+ ) -> Optional[LlmResponse]:
139
+ """
140
+ Callback function to modify user messages to point to temporary artifact file paths.
141
+ """
142
+
143
+ # Changes to session state made by callbacks are not preserved across events
144
+ # See: https://github.com/google/adk-docs/issues/904
145
+ # Therefore, for every callback invocation we need to loop over all events, not just the most recent one
146
+ for i in range(len(llm_request.contents)):
147
+ # Inspect the user message in the request contents
148
+ user_message = llm_request.contents[i].parts[-1].text
149
+ if user_message:
150
+ # Modify file path in user message
151
+ # Original file path inserted by SaveFilesAsArtifactsPlugin():
152
+ # [Uploaded Artifact: "breast-cancer.csv"]
153
+ # Modified file path used by preprocess_artifact():
154
+ # [Uploaded File: "/tmp/uploads/breast-cancer.csv"]
155
+ tmp_dir = "/tmp/uploads/"
156
+ if '[Uploaded Artifact: "' in user_message:
157
+ user_message = user_message.replace(
158
+ '[Uploaded Artifact: "', f'[Uploaded File: "{tmp_dir}'
159
+ )
160
+ llm_request.contents[i].parts[-1].text = user_message
161
+ print(f"[preprocess_messages] Modified user message: '{user_message}'")
162
+
163
+ return None
164
+
165
+
166
+ def detect_file_type(byte_data: bytes) -> Tuple[str, str]:
167
+ """
168
+ Detect file type from magic number/bytes and return (mime_type, file_extension).
169
+ Supports BMP, JPEG, PNG, TIFF, and PDF.
170
+ """
171
+ if len(byte_data) < 8:
172
+ # Default to PNG if we can't determine
173
+ return "image/png", "png"
174
+
175
+ # Check magic numbers
176
+ if byte_data.startswith(b"\x89PNG\r\n\x1a\n"):
177
+ return "image/png", "png"
178
+ elif byte_data.startswith(b"\xff\xd8\xff"):
179
+ return "image/jpeg", "jpg"
180
+ elif byte_data.startswith(b"BM"):
181
+ return "image/bmp", "bmp"
182
+ elif byte_data.startswith(b"II*\x00") or byte_data.startswith(b"MM\x00*"):
183
+ return "image/tiff", "tiff"
184
+ elif byte_data.startswith(b"%PDF"):
185
+ return "application/pdf", "pdf"
186
+ else:
187
+ # Default to PNG if we can't determine
188
+ return "image/png", "png"
189
+
190
+
191
+ async def save_plot_artifact(
192
+ tool: BaseTool, args: Dict[str, Any], tool_context: ToolContext, tool_response: Dict
193
+ ) -> Optional[Dict]:
194
+ """
195
+ Callback function to save plot files as an ADK artifact.
196
+ """
197
+ # We just want to see the plot in the conversation;
198
+ # no need for an extra LLM call to tell us it's there.
199
+ # This also prevents the model from trying to rerun the code,
200
+ # so we can directly show the error message.
201
+ tool_context.actions.skip_summarization = True
202
+
203
+ if tool.name in ["make_plot", "make_ggplot"]:
204
+ # In ADK 1.17.0, tool_response is a dict (i.e. result of model_dump method invoked on MCP CallToolResult instance):
205
+ # https://github.com/google/adk-python/commit/4df926388b6e9ebcf517fbacf2f5532fd73b0f71
206
+ # https://github.com/modelcontextprotocol/python-sdk?tab=readme-ov-file#parsing-tool-results
207
+ if "content" in tool_response and not tool_response["isError"]:
208
+ for content in tool_response["content"]:
209
+ if "type" in content and content["type"] == "text":
210
+ # Convert tool response (hex string) to bytes
211
+ byte_data = bytes.fromhex(content["text"])
212
+
213
+ # Detect file type from magic number
214
+ mime_type, file_extension = detect_file_type(byte_data)
215
+
216
+ # Encode binary data to Base64 format
217
+ encoded = base64.b64encode(byte_data).decode("utf-8")
218
+ artifact_part = types.Part(
219
+ inline_data={
220
+ "data": encoded,
221
+ "mime_type": mime_type,
222
+ }
223
+ )
224
+ # Use second part of tool name (e.g. make_ggplot -> ggplot.png)
225
+ filename = f"{tool.name.split("_", 1)[1]}.{file_extension}"
226
+ await tool_context.save_artifact(
227
+ filename=filename, artifact=artifact_part
228
+ )
229
+ # Format the success message as a tool response
230
+ text = f"Plot created and saved as an artifact: {filename}"
231
+ response = CallToolResult(
232
+ content=[TextContent(type="text", text=text)],
233
+ )
234
+ return response.model_dump(exclude_none=True, mode="json")
235
+
236
+ # Passthrough for other tools or no matching content (e.g. tool error)
237
+ return None
238
+
239
+
240
+ # Create agent to run R code
241
+ run_agent = LlmAgent(
242
+ name="Run",
243
+ description="Runs R code without making plots. Use the `Run` agent for executing code that does not load data or make a plot.",
244
+ model=model,
245
+ instruction=Run,
246
+ tools=[
247
+ McpToolset(
248
+ connection_params=connection_params,
249
+ tool_filter=["run_visible", "run_hidden"],
250
+ )
251
+ ],
252
+ before_model_callback=[preprocess_artifact, preprocess_messages],
253
+ before_tool_callback=catch_tool_errors,
254
+ )
255
+
256
+ # Create agent to load data
257
+ data_agent = LlmAgent(
258
+ name="Data",
259
+ description="Loads data into an R data frame and summarizes it. Use the `Data` agent for loading data from a file or URL before making a plot.",
260
+ model=model,
261
+ instruction=Data,
262
+ tools=[
263
+ McpToolset(
264
+ connection_params=connection_params,
265
+ tool_filter=["run_visible"],
266
+ )
267
+ ],
268
+ before_model_callback=[preprocess_artifact, preprocess_messages],
269
+ before_tool_callback=catch_tool_errors,
270
+ )
271
+
272
+ # Create agent to make plots using R code
273
+ plot_agent = LlmAgent(
274
+ name="Plot",
275
+ description="Makes plots using R code. Use the `Plot` agent after loading any required data.",
276
+ model=model,
277
+ instruction=Plot,
278
+ tools=[
279
+ McpToolset(
280
+ connection_params=connection_params,
281
+ tool_filter=["make_plot", "make_ggplot"],
282
+ )
283
+ ],
284
+ before_model_callback=[preprocess_artifact, preprocess_messages],
285
+ before_tool_callback=catch_tool_errors,
286
+ after_tool_callback=save_plot_artifact,
287
+ )
288
+
289
+ # Create parent agent and assign children via sub_agents
290
+ root_agent = LlmAgent(
291
+ name="Coordinator",
292
+ # "Use the..." tells sub-agents to transfer to Coordinator for help requests
293
+ description="Multi-agent system for performing actions in R. Use the `Coordinator` agent for getting help on packages, datasets, and functions.",
294
+ model=model,
295
+ instruction=Root,
296
+ # To pass control back to root, the help and run functions should be tools or a ToolAgent (not sub_agent)
297
+ tools=[
298
+ McpToolset(
299
+ connection_params=connection_params,
300
+ tool_filter=["help_package", "help_topic"],
301
+ )
302
+ ],
303
+ sub_agents=[
304
+ run_agent,
305
+ data_agent,
306
+ plot_agent,
307
+ ],
308
+ # Select R session
309
+ before_agent_callback=select_r_session,
310
+ # Save user-uploaded artifact as a temporary file and modify messages to point to this file
311
+ before_model_callback=[preprocess_artifact, preprocess_messages],
312
+ before_tool_callback=catch_tool_errors,
313
+ )
314
+
315
+ app = App(
316
+ name="PlotMyData",
317
+ root_agent=root_agent,
318
+ # This inserts user messages like '[Uploaded Artifact: "breast-cancer.csv"]'
319
+ plugins=[SaveFilesAsArtifactsPlugin()],
320
+ )
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
- title: Plotmydata
3
  emoji: 👀
4
  colorFrom: indigo
5
  colorTo: red
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
- short_description: Data analysis and plotting with ADK, MCP, and R
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: PlotMyData
3
  emoji: 👀
4
  colorFrom: indigo
5
  colorTo: red
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
+ short_description: Data analysis and plotting with Google ADK, MCP, and R
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
data_summary.R ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Summarize a data frame, for example:
2
+ # Data frame dimensions: 10 rows x 3 columns
3
+ # Data Summary:
4
+ # col1: integer
5
+ # col2: numeric, missing=3
6
+ # col3: character
7
+
8
+ data_summary <- function(df) {
9
+ nrows <- nrow(df)
10
+ ncols <- ncol(df)
11
+ lines <- c(sprintf("Data frame dimensions: %d rows x %d columns", nrows, ncols), "Data Summary:")
12
+
13
+ # Helper for R data type names
14
+ type_map <- function(x) {
15
+ if (is.factor(x)) return("factor")
16
+ if (is.character(x)) return("character")
17
+ if (is.logical(x)) return("logical")
18
+ if (inherits(x, "Date")) return("Date")
19
+ if (is.numeric(x)) {
20
+ vals <- x[!is.na(x)]
21
+ if (length(vals) > 0 && all(abs(vals - round(vals)) < .Machine$double.eps^0.5)) return("integer")
22
+ return("numeric")
23
+ }
24
+ return(class(x)[1])
25
+ }
26
+
27
+ for (col in names(df)) {
28
+ dtype <- type_map(df[[col]])
29
+ miss <- sum(is.na(df[[col]]))
30
+ if (miss > 0) {
31
+ lines <- c(lines, sprintf("%s: %s, missing=%d", col, dtype, miss))
32
+ } else {
33
+ lines <- c(lines, sprintf("%s: %s", col, dtype))
34
+ }
35
+ }
36
+ paste(lines, collapse = "\n")
37
+ }
docker/entrypoint.sh ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ # Exit immediately on errors
4
+ set -e
5
+
6
+ # MCP session setup for persistent R environment
7
+ # Create .Rprofile to run mcp_session() when R starts
8
+ echo "library(tidyverse); source('data_summary.R'); mcptools::mcp_session()" > .Rprofile
9
+
10
+ # Start R in a detached screen session
11
+ # TODO: Look at using supervisord for another way to run multiple services
12
+ # https://docs.docker.com/engine/containers/multi-service_container/#use-a-process-manager
13
+ screen -d -m R
14
+
15
+ # Activate virtual environment
16
+ export PATH="/opt/venv/bin:$PATH"
17
+
18
+ # Set OpenAI model
19
+ export OPENAI_MODEL_NAME=gpt-4o
20
+ #export OPENAI_API_KEY=$(cat /run/secrets/openai-api-key)
21
+ echo "Using OpenAI with ${OPENAI_MODEL_NAME}"
22
+
23
+ exec adk web --host 0.0.0.0 --port 8080 --reload_agents
prompts.R ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ make_plot_prompt <- '
2
+ Runs R code to make a plot with base R graphics.
3
+
4
+ Args:
5
+ code: R code to run
6
+
7
+ Returns:
8
+ Binary image data
9
+
10
+ Details:
11
+ `code` should be R code that begins with e.g. `png(filename)` and ends with `dev.off()`.
12
+ Always use the variable `filename` instead of an actual file name.
13
+
14
+ Example: User requests "Plot x (1,2,3) and y (10,20,30)", then `code` is:
15
+
16
+ png(filename)
17
+ x <- c(1, 2, 3)
18
+ y <- c(10, 20, 30)
19
+ plot(x, y)
20
+ dev.off()
21
+
22
+ Example: User requests "Give me a 8.5x11 inch PDF of y = x^2 from -1 to 1, large font, titled with the function", then `code` is:
23
+
24
+ pdf(filename, width = 8.5, height = 11)
25
+ par(cex = 2)
26
+ x <- seq(-1, 1, length.out = 100)
27
+ y <- x^2
28
+ plot(x, y, type = "l")
29
+ title(main = quote(y == x^2))
30
+ dev.off()
31
+
32
+ Example: User requests "Plot radius_worst (y) vs radius_mean (x) from https://zenodo.org/records/3608984/files/breastcancer.csv?download=1", then `code` is:
33
+
34
+ png(filename)
35
+ df <- read.csv("https://zenodo.org/records/3608984/files/breastcancer.csv?download=1")
36
+ plot(df$radius_mean, df$radius_worst, xlab = "radius_worst", ylab = "radius_mean")
37
+ dev.off()
38
+
39
+ Example: User requests "Plot radius_worst (y) vs radius_mean (x)" and [Uploaded File: "/tmp/uploads/breast-cancer.csv"], then `code` is:
40
+
41
+ png(filename)
42
+ df <- read.csv("/tmp/uploads/breast-cancer.csv")
43
+ plot(df$radius_mean, df$radius_worst, xlab = "radius_worst", ylab = "radius_mean")
44
+ dev.off()
45
+ '
46
+
47
+ make_ggplot_prompt <- '
48
+ Runs R code to make a plot with ggplot/ggplot2.
49
+
50
+ Args:
51
+ code: R code to run
52
+
53
+ Returns:
54
+ Binary image data
55
+
56
+ Details:
57
+ `code` should be R code that begins with `library(ggplot2)` and ends with `ggsave(filename, device = "png")`.
58
+
59
+ Example: User requests "ggplot wt vs mpg from mtcars", then `code` is:
60
+
61
+ library(ggplot2)
62
+ ggplot(mtcars, aes(mpg, wt)) +
63
+ geom_point()
64
+ ggsave(filename, device = "png")
65
+
66
+ Example: User requests "ggplot wt vs mpg from mtcars as pdf", then `code` is:
67
+
68
+ library(ggplot2)
69
+ ggplot(mtcars, aes(mpg, wt)) +
70
+ geom_point()
71
+ ggsave(filename, device = "pdf")
72
+
73
+ Important notes:
74
+
75
+ - `code` must end with ggsave(filename, device = ) with a specified device.
76
+ - Use `device = "png"` unless the user requests a different format.
77
+ - Always use the variable `filename` instead of an actual file name.
78
+ '
79
+
80
+ help_topic_prompt <- '
81
+ Gets documentation for a dataset, function, or other topic.
82
+
83
+ Args:
84
+ topic: Topic or function to get help for.
85
+
86
+ Returns:
87
+ Documentation text. May include runnable R examples.
88
+
89
+ Examples:
90
+ - Show the arguments of the `lm` function: help_topic("lm").
91
+ - Show the format of the `airquality` dataset: help_topic("airquality").
92
+ - Get variables in `Titanic`: help_topic("Titanic").
93
+ '
94
+
95
+ help_package_prompt <- '
96
+ Summarizes datasets and functions in an R package.
97
+
98
+ Args:
99
+ package: Package to get help for.
100
+
101
+ Returns:
102
+ Documentation text. Includes a package description and index of functions and datasets.
103
+
104
+ Examples:
105
+ - Get the names of R datasets: help_package("datasets").
106
+ - List graphics functions in base R: help_package("graphics").
107
+ '
108
+
109
+ run_visible_prompt <- '
110
+ Runs R code and returns the result.
111
+ Does not make plots.
112
+
113
+ Args:
114
+ code: R code to run.
115
+
116
+ Returns:
117
+ Result of R code execution.
118
+ '
119
+
120
+ run_hidden_prompt <- '
121
+ Run R code without returning the result.
122
+ Does not make plots.
123
+
124
+ Args:
125
+ code: R code to run.
126
+
127
+ Returns:
128
+ Nothing.
129
+
130
+ NOTE: Choose this tool if:
131
+ - The user asks to save the result in a variable, or
132
+ - You are performing intermediate calculations before making a plot.
133
+ '
prompts.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Root = """
2
+ Your purpose is to interact with an R session to perform data analysis and visualization on the user's behalf.
3
+ You cannot run code directly, but may use the `Data`, `Plot`, and `Run` agents.
4
+
5
+ Only use the `Run` agent if the following conditions are both true:
6
+
7
+ - The operation is requested by the user ("calculate" or "run"), and
8
+ - The code does not make a plot, chart, graph, or any other visualization.
9
+
10
+ You may call a help tool before transfering control to an agent:
11
+
12
+ - If an R dataset ("dataset") is requested, use help_package('datasets') to find the correct dataset name.
13
+ - If the user requests documentation for specific datasets or functions, use the `help_topic` tool.
14
+
15
+ Examples:
16
+
17
+ - Query includes "?boxplot": The user is requesting documentation. Call help_topic('boxplot') then transfer to an agent.
18
+ - "Plot distance vs speed from the cars dataset": This is a plot request using an R dataset. Call help_package('datasets') then transfer to the `Data` agent.
19
+ - "Calculate x = cos(x) for x = 0 to 12 and make a plot": This is a plot that does not require data. Transfer to the `Plot` agent.
20
+ - "Run x <- 2": This is code execution without data or plot. Transfer to the `Run` agent.
21
+ - "Load the data": The user is asking to load data from an uploaded file. Transfer to the `Data` agent.
22
+
23
+ Important notes:
24
+
25
+ - Data may be provided directly by the user, in a URL, in an "Uploaded File" message, or an R dataset.
26
+ - You must not use the `Run` agent to make a plot or execute any other plotting commands.
27
+ - The only way to make a plot, chart, graph, or other visualization is to transfer to the `Data` or `Plot` agents.
28
+ - Do not use install.packages(), library(), or any other commands for package installation and loading.
29
+ """
30
+
31
+ Run = """
32
+ You are an agent that runs R code using the `run_visible` and `run_hidden` tools.
33
+ You cannot make plots.
34
+
35
+ Perform the following actions:
36
+ - Interpret the user's request as R code.
37
+ - If the code makes a plot (including ggplot or any other type of graph or visualization), transfer to the `Plot` agent.
38
+ - If the code assigns the result to a variable, pass the code to the `run_hidden` tool.
39
+ - Otherwise, pass the code to the `run_visible` tool.
40
+
41
+ Important notes:
42
+
43
+ - The `run_hidden` tool runs R commands without returning the result. This is useful for reducing LLM token usage while working with large variables.
44
+ - You can use dplyr, tidyr, and other tidyverse packages.
45
+ - Your response should always be valid, self-contained R code.
46
+ - If the tool response is an error (isError: true), respond with the exact text of the error message and stop running code.
47
+ """
48
+
49
+ Data = """
50
+ You are an agent that loads and summarizes data.
51
+ Your main task has three parts:
52
+
53
+ 1. Generate R code to create a `df` object and summarize it with `data_summary(df)`.
54
+ 2. Use the `run_visible` tool to execute the code.
55
+ 3. Transfer to the `Plot` agent to make a plot.
56
+
57
+ Choose the first available data source:
58
+
59
+ 1: Data provided directly by the user.
60
+ 2: File provided in an "Uploaded File" message. Do not use other files.
61
+ 3: URL provided by the user. Do not use other URLs.
62
+ 4: Available R dataset that matches the user's request.
63
+
64
+ Examples of code for `run_visible`:
65
+
66
+ - User requests "plot 1,2,3 10,20,30": code is `df <- data.frame(x = c(1,2,3), y = (10, 20, 30))
67
+ data_summary(df)`.
68
+ - User requests "plot cars data": code is `df <- data.frame(cars)
69
+ data_summary(df)`
70
+ - To read CSV data from a URL, use `df <- read.csv(csv_url)`, where csv_url is the exact URL provided by the user.
71
+ - To read CSV data from a file, use `df <- read.csv(file_path)`, where file_path is provided in an "Uploaded File" user message.
72
+
73
+ What to do after calling `run_visible`:
74
+
75
+ - If "Data Summary" exists and the user requested a plot, then pass control to the `Plot` agent.
76
+ - If "Data Summary" exists and the user did not request a plot, then stop the workflow.
77
+ - If the user provided data but "Data Summary" does not exist, then stop and report a problem.
78
+
79
+ Important notes:
80
+
81
+ - Do not use the `run_visible` tool to make a plot.
82
+ - Run `data_summary(df)` in your code. Do not run `summary(df)`.
83
+ - You can use dplyr, tidyr, and other tidyverse packages.
84
+ """
85
+
86
+ Plot = """
87
+ You are an agent that makes plots with R code using the `make_plot` and `make_ggplot` tools.
88
+
89
+ Coding strategy:
90
+
91
+ - Use previously assigned variables (especially `df`) in your code.
92
+ - Do not load data yourself.
93
+ - Use a specific variable other than `df` if it is better for making the plot.
94
+ - Choose column names in `df` based on the user's request.
95
+ - Column names are case-sensitive, syntactically valid R names.
96
+ - Look in the Data Summary for details.
97
+ - No data are required for plotting functions and simulations.
98
+
99
+ Plot tools:
100
+
101
+ - For base R graphics use the `make_plot` tool.
102
+ - For ggplot/ggplot2 use the `make_ggplot` tool.
103
+ - Both of these tools save the plot as a conversation artifact that is visible to the user.
104
+
105
+ Examples:
106
+ - User requests to plot "dates", but the Data Summary lists a "Date" column. Answer: use `df$Date`.
107
+ - User requests to plot "volcano", but `df` also exists. Answer: The `volcano` matrix is better for images; use `image(volcano)`.
108
+
109
+ Important notes:
110
+
111
+ - Use base R graphics unless the user asks for ggplot or ggplot2.
112
+ - Pay attention to the user's request and use your knowledge of R to write code that gives the best-looking plot.
113
+ - Your response should always be valid, self-contained R code.
114
+ """
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ google-adk==1.22.0
2
+ litellm==1.80.13
3
+ mcp==1.25.0
server.R ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 20251009 Added plot tool
2
+ # 20251023 Added help tools
3
+
4
+ # Load ellmer for tool() and type_*()
5
+ library(ellmer)
6
+
7
+ # Read prompts
8
+ source("prompts.R")
9
+
10
+ # Get help for a package
11
+ help_package <- function(package) {
12
+ help_page <- help(package = (package), help_type = "text")
13
+ paste(unlist(help_page$info), collapse = "\n")
14
+ }
15
+
16
+ # Get help for a topic
17
+ # Adapted from https://github.com/posit-dev/btw:::help_to_rd
18
+ help_topic <- function(topic) {
19
+ help_page <- help(topic = (topic), help_type = "text")
20
+ if(length(help_page) == 0) {
21
+ return(paste0("No help found for '", topic, "'. Please check the name and try again."))
22
+ }
23
+ # Handle multiple help files for a topic
24
+ # e.g. help_topic(plot) returns the help for both base::plot and graphics::plot.default
25
+ help_paths <- as.character(help_page)
26
+ help_result <- sapply(help_paths, function(help_path) {
27
+ rd_name <- basename(help_path)
28
+ rd_package <- basename(dirname(dirname(help_path)))
29
+ db <- tools::Rd_db(rd_package)[[paste0(rd_name, ".Rd")]]
30
+ paste(as.character(db), collapse = "")
31
+ })
32
+ # Insert headings to help the LLM distinguish multiple help files
33
+ # Heading before each help file (e.g. Help file 1, Help file 2)
34
+ help_result <- paste0("## Help file ", seq_along(help_result), ":\n", help_result)
35
+ # Heading at start of message (e.g. 2 help files were retrieved)
36
+ if(length(help_paths) == 1) help_info <- paste0("# ", length(help_paths), " help file was retrieved: ", paste(help_paths, collapse = ", "), ":\n")
37
+ if(length(help_paths) > 1) help_info <- paste0("# ", length(help_paths), " help files were retrieved: ", paste(help_paths, collapse = ", "), ":\n")
38
+ help_result <- c(help_info, help_result)
39
+ help_result
40
+ }
41
+
42
+ # Run R code and return the result
43
+ # https://github.com/posit-dev/mcptools/issues/71
44
+ run_visible <- function(code) {
45
+ eval(parse(text = code), globalenv())
46
+ }
47
+
48
+ # Run R code without returning the result
49
+ # https://github.com/posit-dev/mcptools/issues/71
50
+ run_hidden <- function(code) {
51
+ eval(parse(text = code), globalenv())
52
+ return("The code executed successfully")
53
+ }
54
+
55
+ # Run R code to make a plot and return the image data
56
+ make_plot <- function(code) {
57
+ # Cursor, Bing and Google AI all suggest this but it causes an error:
58
+ # Error in png(filename = raw_conn) :
59
+ # 'filename' must be a non-empty character string
60
+ ## Write plot to an in-memory PNG
61
+ #raw_conn <- rawConnection(raw(), open = "wb")
62
+ #png(filename = raw_conn)
63
+
64
+ # Use a temporary file to save the plot
65
+ filename <- tempfile(fileext = ".dat")
66
+ on.exit(unlink(filename))
67
+
68
+ # Run the plotting code (this should include e.g. png() and dev.off())
69
+ # The code uses a local variable (filename), so don't use envir = globalenv() here
70
+ eval(parse(text = code))
71
+
72
+ # Return a PNG image as raw bytes so ADK can save it as an artifact
73
+ readr::read_file_raw(filename)
74
+ }
75
+
76
+ # This is the same code as make_plot() but has a different tool description
77
+ make_ggplot <- function(code) {
78
+ filename <- tempfile(fileext = ".dat")
79
+ on.exit(unlink(filename))
80
+ eval(parse(text = code))
81
+ readr::read_file_raw(filename)
82
+ }
83
+
84
+ mcptools::mcp_server(tools = list(
85
+
86
+ tool(
87
+ help_package,
88
+ help_package_prompt,
89
+ arguments = list(
90
+ package = type_string("Package to get help for.")
91
+ )
92
+ ),
93
+
94
+ tool(
95
+ help_topic,
96
+ help_topic_prompt,
97
+ arguments = list(
98
+ topic = type_string("Topic or function to get help for.")
99
+ )
100
+ ),
101
+
102
+ tool(
103
+ run_visible,
104
+ run_visible_prompt,
105
+ arguments = list(
106
+ code = type_string("R code to run.")
107
+ )
108
+ ),
109
+
110
+ tool(
111
+ run_hidden,
112
+ run_hidden_prompt,
113
+ arguments = list(
114
+ code = type_string("R code to run.")
115
+ )
116
+ ),
117
+
118
+ tool(
119
+ make_plot,
120
+ make_plot_prompt,
121
+ arguments = list(
122
+ code = type_string("R code to make the plot.")
123
+ )
124
+ ),
125
+
126
+ tool(
127
+ make_ggplot,
128
+ make_ggplot_prompt,
129
+ arguments = list(
130
+ code = type_string("R code to make the plot.")
131
+ )
132
+ )
133
+
134
+ ))