Spaces:

jedick
/

plotmydata

Running

App Files Files Community

jedick commited on 5 days ago

Commit

9e909f5

1 Parent(s): 93cd4db

Add app files

Browse files

Files changed (10) hide show

Dockerfile +43 -0
PlotMyData/__init__.py +9 -0
PlotMyData/agent.py +320 -0
README.md +2 -2
data_summary.R +37 -0
docker/entrypoint.sh +23 -0
prompts.R +133 -0
prompts.py +114 -0
requirements.txt +3 -0
server.R +134 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,43 @@

+# Declare the base image
+FROM rocker/r-ver:latest
+# Considerations for local development: reduce Docker cache size and rebuild time
+#   Single RUN directive and two COPY directives
+#     Pre-RUN COPY for relatively stable files, post-RUN COPY for app files
+#   Avoid other directives like USER and ENV
+#     entrypoint.sh activates the virtual environment for running the app
+# Considerations for remote development (HF Spaces Dev Mode)
+#   Dev Mode requires useradd, chown and USER
+#   Use CMD instead of ENTRYPOINT
+# Set working directory and copy non-app files
+WORKDIR /app
+COPY requirements.txt docker/entrypoint.sh .
+# Install Python and system tools
+# Create and activate virtual environment for installing packages
+# Install required Python and R packages
+# Make startup script executable
+# Add user for HF Spaces Dev Mode and chown /app directory for user
+RUN apt-get update && \
+    apt-get install -y python3 python3-pip python3-venv screen vim && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
+    python3 -m venv /opt/venv && \
+    export PATH="/opt/venv/bin:$PATH" && \
+    pip --no-cache-dir install -r requirements.txt && \
+    R -q -e 'install.packages(c("ellmer", "mcptools", "readr", "ggplot2", "tidyverse"))' && \
+    chmod +x entrypoint.sh && \
+    chown -r 1000:1000 /app && \
+    useradd -m -u 1000 user
+# Copy app files with user permissions
+# NOTE: because the repo has docker/entrypoint.sh, this does *not*
+# overwrite the entrypoint.sh that we made executable above
+COPY --chown=1000 . /app
+# Set the user for Dev Mode
+USER 1000
+# Set default command (executable file in WORKDIR)
+CMD [ "/app/entrypoint.sh" ]

PlotMyData/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import os
+from pathlib import Path
+from . import agent
+# Ensure upload directory exists
+upload_dir = "/tmp/uploads"
+Path(upload_dir).mkdir(parents=True, exist_ok=True)
+# Read, write, execute for owner; read and execute for others
+os.chmod(upload_dir, 0o755)

PlotMyData/agent.py ADDED Viewed

	@@ -0,0 +1,320 @@

+from google.adk.plugins.save_files_as_artifacts_plugin import SaveFilesAsArtifactsPlugin
+from google.adk.tools.mcp_tool.mcp_session_manager import StdioConnectionParams
+from google.adk.tools.mcp_tool.mcp_session_manager import SseConnectionParams
+from google.adk.tools.mcp_tool.mcp_toolset import McpToolset
+from google.adk.tools.tool_context import ToolContext
+from google.adk.tools.base_tool import BaseTool
+from google.adk.agents.callback_context import CallbackContext
+from google.adk.agents import LlmAgent
+from google.adk.models import LlmResponse, LlmRequest
+from google.adk.models.lite_llm import LiteLlm
+from google.adk.apps import App
+from google.genai import types
+from mcp import ClientSession, StdioServerParameters
+from mcp.types import CallToolResult, TextContent
+from mcp.client.stdio import stdio_client
+from typing import Dict, Any, Optional, Tuple
+from prompts import Root, Run, Data, Plot
+import base64
+import os
+# Define MCP server parameters
+server_params = StdioServerParameters(
+    command="Rscript",
+    args=[
+        # Use --vanilla to ignore .Rprofile, which is meant for the R instance running mcp_session()
+        "--vanilla",
+        "server.R",
+    ],
+)
+# STDIO transport to local R MCP server
+connection_params = StdioConnectionParams(server_params=server_params, timeout=10)
+# Define model
+# If we're using the OpenAI API, get the value of OPENAI_MODEL_NAME set by entrypoint.sh
+# If we're using an OpenAI-compatible endpoint (Docker Model Runner), use a fake API key
+model = LiteLlm(
+    model=os.environ.get("OPENAI_MODEL_NAME", ""),
+    api_key=os.environ.get("OPENAI_API_KEY", "fake-API-key"),
+)
+async def select_r_session(
+    callback_context: CallbackContext,
+) -> Optional[types.Content]:
+    """
+    Callback function to select the first R session.
+    """
+    async with stdio_client(server_params) as (read, write):
+        async with ClientSession(read, write) as session:
+            await session.initialize()
+            await session.call_tool("select_r_session", {"session": 1})
+            print("[select_r_session] R session selected!")
+    # Return None to allow the LlmAgent's normal execution
+    return None
+async def catch_tool_errors(tool: BaseTool, args: dict, tool_context: ToolContext):
+    """
+    Callback function to catch errors from tool calls and turn them into a message.
+    Modified from https://github.com/google/adk-python/discussions/795#discussioncomment-13460659
+    """
+    try:
+        return await tool.run_async(args=args, tool_context=tool_context)
+    except Exception as e:
+        # Format the error as a tool response
+        # https://github.com/google/adk-python/commit/4df926388b6e9ebcf517fbacf2f5532fd73b0f71
+        response = CallToolResult(
+            # The error has class McpError; use e.error.message to get the text
+            content=[TextContent(type="text", text=e.error.message)],
+            isError=True,
+        )
+        return response.model_dump(exclude_none=True, mode="json")
+async def preprocess_artifact(
+    callback_context: CallbackContext, llm_request: LlmRequest
+) -> Optional[LlmResponse]:
+    """
+    Callback function to copy the latest artifact to a temporary file.
+    """
+    # Callback and artifact handling code modified from:
+    # https://google.github.io/adk-docs/callbacks/types-of-callbacks/#before-model-callback
+    # https://github.com/google/adk-python/issues/2176#issuecomment-3395469070
+    # Get the last user message in the request contents
+    last_user_message = llm_request.contents[-1].parts[-1].text
+    # Function call events have no text part, so set this to "" for string search in the next step
+    if last_user_message is None:
+        last_user_message = ""
+    # If a file was uploaded then SaveFilesAsArtifactsPlugin() adds "[Uploaded Artifact: file_name.csv]" to the user message
+    # Check for "Uploaded Artifact:" in the last user message
+    if "Uploaded Artifact:" in last_user_message:
+        # Add a text part only if there are any issues with accessing or saving the artifact
+        added_text = ""
+        # List available artifacts
+        artifacts = await callback_context.list_artifacts()
+        if len(artifacts) == 0:
+            added_text = "No uploaded file is available"
+        else:
+            most_recent_file = artifacts[-1]
+            try:
+                # Get artifact and byte data
+                artifact = await callback_context.load_artifact(
+                    filename=most_recent_file
+                )
+                byte_data = artifact.inline_data.data
+                # Save artifact as temporary file
+                tmp_dir = "/tmp/uploads"
+                tmp_file_path = os.path.join(tmp_dir, most_recent_file)
+                # Write the file
+                with open(tmp_file_path, "wb") as f:
+                    f.write(byte_data)
+                # Set appropriate permissions
+                os.chmod(tmp_file_path, 0o644)
+                print(f"[preprocess_artifact] Saved artifact as '{tmp_file_path}'")
+            except Exception as e:
+                added_text = f"Error processing artifact: {str(e)}"
+        # If there were any issues, add a new part to the user message
+        if added_text:
+            # llm_request.contents[-1].parts.append(types.Part(text=added_text))
+            llm_request.contents[0].parts.append(types.Part(text=added_text))
+            print(
+                f"[preprocess_artifact] Added text part to user message: '{added_text}'"
+            )
+    # Return None to allow the possibly modified request to go to the LLM
+    return None
+async def preprocess_messages(
+    callback_context: CallbackContext, llm_request: LlmRequest
+) -> Optional[LlmResponse]:
+    """
+    Callback function to modify user messages to point to temporary artifact file paths.
+    """
+    # Changes to session state made by callbacks are not preserved across events
+    # See: https://github.com/google/adk-docs/issues/904
+    # Therefore, for every callback invocation we need to loop over all events, not just the most recent one
+    for i in range(len(llm_request.contents)):
+        # Inspect the user message in the request contents
+        user_message = llm_request.contents[i].parts[-1].text
+        if user_message:
+            # Modify file path in user message
+            # Original file path inserted by SaveFilesAsArtifactsPlugin():
+            #   [Uploaded Artifact: "breast-cancer.csv"]
+            # Modified file path used by preprocess_artifact():
+            #   [Uploaded File: "/tmp/uploads/breast-cancer.csv"]
+            tmp_dir = "/tmp/uploads/"
+            if '[Uploaded Artifact: "' in user_message:
+                user_message = user_message.replace(
+                    '[Uploaded Artifact: "', f'[Uploaded File: "{tmp_dir}'
+                )
+                llm_request.contents[i].parts[-1].text = user_message
+                print(f"[preprocess_messages] Modified user message: '{user_message}'")
+    return None
+def detect_file_type(byte_data: bytes) -> Tuple[str, str]:
+    """
+    Detect file type from magic number/bytes and return (mime_type, file_extension).
+    Supports BMP, JPEG, PNG, TIFF, and PDF.
+    """
+    if len(byte_data) < 8:
+        # Default to PNG if we can't determine
+        return "image/png", "png"
+    # Check magic numbers
+    if byte_data.startswith(b"\x89PNG\r\n\x1a\n"):
+        return "image/png", "png"
+    elif byte_data.startswith(b"\xff\xd8\xff"):
+        return "image/jpeg", "jpg"
+    elif byte_data.startswith(b"BM"):
+        return "image/bmp", "bmp"
+    elif byte_data.startswith(b"II*\x00") or byte_data.startswith(b"MM\x00*"):
+        return "image/tiff", "tiff"
+    elif byte_data.startswith(b"%PDF"):
+        return "application/pdf", "pdf"
+    else:
+        # Default to PNG if we can't determine
+        return "image/png", "png"
+async def save_plot_artifact(
+    tool: BaseTool, args: Dict[str, Any], tool_context: ToolContext, tool_response: Dict
+) -> Optional[Dict]:
+    """
+    Callback function to save plot files as an ADK artifact.
+    """
+    # We just want to see the plot in the conversation;
+    # no need for an extra LLM call to tell us it's there.
+    # This also prevents the model from trying to rerun the code,
+    # so we can directly show the error message.
+    tool_context.actions.skip_summarization = True
+    if tool.name in ["make_plot", "make_ggplot"]:
+        # In ADK 1.17.0, tool_response is a dict (i.e. result of model_dump method invoked on MCP CallToolResult instance):
+        # https://github.com/google/adk-python/commit/4df926388b6e9ebcf517fbacf2f5532fd73b0f71
+        # https://github.com/modelcontextprotocol/python-sdk?tab=readme-ov-file#parsing-tool-results
+        if "content" in tool_response and not tool_response["isError"]:
+            for content in tool_response["content"]:
+                if "type" in content and content["type"] == "text":
+                    # Convert tool response (hex string) to bytes
+                    byte_data = bytes.fromhex(content["text"])
+                    # Detect file type from magic number
+                    mime_type, file_extension = detect_file_type(byte_data)
+                    # Encode binary data to Base64 format
+                    encoded = base64.b64encode(byte_data).decode("utf-8")
+                    artifact_part = types.Part(
+                        inline_data={
+                            "data": encoded,
+                            "mime_type": mime_type,
+                        }
+                    )
+                    # Use second part of tool name (e.g. make_ggplot -> ggplot.png)
+                    filename = f"{tool.name.split("_", 1)[1]}.{file_extension}"
+                    await tool_context.save_artifact(
+                        filename=filename, artifact=artifact_part
+                    )
+                    # Format the success message as a tool response
+                    text = f"Plot created and saved as an artifact: {filename}"
+                    response = CallToolResult(
+                        content=[TextContent(type="text", text=text)],
+                    )
+                    return response.model_dump(exclude_none=True, mode="json")
+    # Passthrough for other tools or no matching content (e.g. tool error)
+    return None
+# Create agent to run R code
+run_agent = LlmAgent(
+    name="Run",
+    description="Runs R code without making plots. Use the `Run` agent for executing code that does not load data or make a plot.",
+    model=model,
+    instruction=Run,
+    tools=[
+        McpToolset(
+            connection_params=connection_params,
+            tool_filter=["run_visible", "run_hidden"],
+        )
+    ],
+    before_model_callback=[preprocess_artifact, preprocess_messages],
+    before_tool_callback=catch_tool_errors,
+)
+# Create agent to load data
+data_agent = LlmAgent(
+    name="Data",
+    description="Loads data into an R data frame and summarizes it. Use the `Data` agent for loading data from a file or URL before making a plot.",
+    model=model,
+    instruction=Data,
+    tools=[
+        McpToolset(
+            connection_params=connection_params,
+            tool_filter=["run_visible"],
+        )
+    ],
+    before_model_callback=[preprocess_artifact, preprocess_messages],
+    before_tool_callback=catch_tool_errors,
+)
+# Create agent to make plots using R code
+plot_agent = LlmAgent(
+    name="Plot",
+    description="Makes plots using R code. Use the `Plot` agent after loading any required data.",
+    model=model,
+    instruction=Plot,
+    tools=[
+        McpToolset(
+            connection_params=connection_params,
+            tool_filter=["make_plot", "make_ggplot"],
+        )
+    ],
+    before_model_callback=[preprocess_artifact, preprocess_messages],
+    before_tool_callback=catch_tool_errors,
+    after_tool_callback=save_plot_artifact,
+)
+# Create parent agent and assign children via sub_agents
+root_agent = LlmAgent(
+    name="Coordinator",
+    # "Use the..." tells sub-agents to transfer to Coordinator for help requests
+    description="Multi-agent system for performing actions in R. Use the `Coordinator` agent for getting help on packages, datasets, and functions.",
+    model=model,
+    instruction=Root,
+    # To pass control back to root, the help and run functions should be tools or a ToolAgent (not sub_agent)
+    tools=[
+        McpToolset(
+            connection_params=connection_params,
+            tool_filter=["help_package", "help_topic"],
+        )
+    ],
+    sub_agents=[
+        run_agent,
+        data_agent,
+        plot_agent,
+    ],
+    # Select R session
+    before_agent_callback=select_r_session,
+    # Save user-uploaded artifact as a temporary file and modify messages to point to this file
+    before_model_callback=[preprocess_artifact, preprocess_messages],
+    before_tool_callback=catch_tool_errors,
+)
+app = App(
+    name="PlotMyData",
+    root_agent=root_agent,
+    # This inserts user messages like '[Uploaded Artifact: "breast-cancer.csv"]'
+    plugins=[SaveFilesAsArtifactsPlugin()],
+)

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
 ---
-title: Plotmydata
 emoji: 👀
 colorFrom: indigo
 colorTo: red
 sdk: docker
 pinned: false
 license: mit
-short_description: Data analysis and plotting with ADK, MCP, and R
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: PlotMyData
 emoji: 👀
 colorFrom: indigo
 colorTo: red
 sdk: docker
 pinned: false
 license: mit
+short_description: Data analysis and plotting with Google ADK, MCP, and R
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

data_summary.R ADDED Viewed

	@@ -0,0 +1,37 @@

+# Summarize a data frame, for example:
+# Data frame dimensions: 10 rows x 3 columns
+# Data Summary:
+# col1: integer
+# col2: numeric, missing=3
+# col3: character
+data_summary <- function(df) {
+  nrows <- nrow(df)
+  ncols <- ncol(df)
+  lines <- c(sprintf("Data frame dimensions: %d rows x %d columns", nrows, ncols), "Data Summary:")
+  # Helper for R data type names
+  type_map <- function(x) {
+    if (is.factor(x)) return("factor")
+    if (is.character(x)) return("character")
+    if (is.logical(x)) return("logical")
+    if (inherits(x, "Date")) return("Date")
+    if (is.numeric(x)) {
+      vals <- x[!is.na(x)]
+      if (length(vals) > 0 && all(abs(vals - round(vals)) < .Machine$double.eps^0.5)) return("integer")
+      return("numeric")
+    }
+    return(class(x)[1])
+  }
+  for (col in names(df)) {
+    dtype <- type_map(df[[col]])
+    miss <- sum(is.na(df[[col]]))
+    if (miss > 0) {
+      lines <- c(lines, sprintf("%s: %s, missing=%d", col, dtype, miss))
+    } else {
+      lines <- c(lines, sprintf("%s: %s", col, dtype))
+    }
+  }
+  paste(lines, collapse = "\n")
+}

docker/entrypoint.sh ADDED Viewed

	@@ -0,0 +1,23 @@

+#!/bin/sh
+# Exit immediately on errors
+set -e
+# MCP session setup for persistent R environment
+# Create .Rprofile to run mcp_session() when R starts
+echo "library(tidyverse); source('data_summary.R'); mcptools::mcp_session()" > .Rprofile
+# Start R in a detached screen session
+# TODO: Look at using supervisord for another way to run multiple services
+# https://docs.docker.com/engine/containers/multi-service_container/#use-a-process-manager
+screen -d -m R
+# Activate virtual environment
+export PATH="/opt/venv/bin:$PATH"
+# Set OpenAI model
+export OPENAI_MODEL_NAME=gpt-4o
+#export OPENAI_API_KEY=$(cat /run/secrets/openai-api-key)
+echo "Using OpenAI with ${OPENAI_MODEL_NAME}"
+exec adk web --host 0.0.0.0 --port 8080 --reload_agents

prompts.R ADDED Viewed

	@@ -0,0 +1,133 @@

+make_plot_prompt <- '
+Runs R code to make a plot with base R graphics.
+Args:
+  code: R code to run
+Returns:
+  Binary image data
+Details:
+`code` should be R code that begins with e.g. `png(filename)` and ends with `dev.off()`.
+Always use the variable `filename` instead of an actual file name.
+Example: User requests "Plot x (1,2,3) and y (10,20,30)", then `code` is:
+png(filename)
+x <- c(1, 2, 3)
+y <- c(10, 20, 30)
+plot(x, y)
+dev.off()
+Example: User requests "Give me a 8.5x11 inch PDF of y = x^2 from -1 to 1, large font, titled with the function", then `code` is:
+pdf(filename, width = 8.5, height = 11)
+par(cex = 2)
+x <- seq(-1, 1, length.out = 100)
+y <- x^2
+plot(x, y, type = "l")
+title(main = quote(y == x^2))
+dev.off()
+Example: User requests "Plot radius_worst (y) vs radius_mean (x) from https://zenodo.org/records/3608984/files/breastcancer.csv?download=1", then `code` is:
+png(filename)
+df <- read.csv("https://zenodo.org/records/3608984/files/breastcancer.csv?download=1")
+plot(df$radius_mean, df$radius_worst, xlab = "radius_worst", ylab = "radius_mean")
+dev.off()
+Example: User requests "Plot radius_worst (y) vs radius_mean (x)" and [Uploaded File: "/tmp/uploads/breast-cancer.csv"], then `code` is:
+png(filename)
+df <- read.csv("/tmp/uploads/breast-cancer.csv")
+plot(df$radius_mean, df$radius_worst, xlab = "radius_worst", ylab = "radius_mean")
+dev.off()
+'
+make_ggplot_prompt <- '
+Runs R code to make a plot with ggplot/ggplot2.
+Args:
+  code: R code to run
+Returns:
+  Binary image data
+Details:
+`code` should be R code that begins with `library(ggplot2)` and ends with `ggsave(filename, device = "png")`.
+Example: User requests "ggplot wt vs mpg from mtcars", then `code` is:
+library(ggplot2)
+ggplot(mtcars, aes(mpg, wt)) +
+  geom_point()
+ggsave(filename, device = "png")
+Example: User requests "ggplot wt vs mpg from mtcars as pdf", then `code` is:
+library(ggplot2)
+ggplot(mtcars, aes(mpg, wt)) +
+  geom_point()
+ggsave(filename, device = "pdf")
+Important notes:
+- `code` must end with ggsave(filename, device = ) with a specified device.
+- Use `device = "png"` unless the user requests a different format.
+- Always use the variable `filename` instead of an actual file name.
+'
+help_topic_prompt <- '
+Gets documentation for a dataset, function, or other topic.
+Args:
+  topic: Topic or function to get help for.
+Returns:
+  Documentation text. May include runnable R examples.
+Examples:
+- Show the arguments of the `lm` function: help_topic("lm").
+- Show the format of the `airquality` dataset: help_topic("airquality").
+- Get variables in `Titanic`: help_topic("Titanic").
+'
+help_package_prompt <- '
+Summarizes datasets and functions in an R package.
+Args:
+  package: Package to get help for.
+Returns:
+  Documentation text. Includes a package description and index of functions and datasets.
+Examples:
+- Get the names of R datasets: help_package("datasets").
+- List graphics functions in base R: help_package("graphics").
+'
+run_visible_prompt <- '
+Runs R code and returns the result.
+Does not make plots.
+Args:
+  code: R code to run.
+Returns:
+  Result of R code execution.
+'
+run_hidden_prompt <- '
+Run R code without returning the result.
+Does not make plots.
+Args:
+  code: R code to run.
+Returns:
+  Nothing.
+NOTE: Choose this tool if:
+  - The user asks to save the result in a variable, or
+  - You are performing intermediate calculations before making a plot.
+'

prompts.py ADDED Viewed

	@@ -0,0 +1,114 @@

+Root = """
+Your purpose is to interact with an R session to perform data analysis and visualization on the user's behalf.
+You cannot run code directly, but may use the `Data`, `Plot`, and `Run` agents.
+Only use the `Run` agent if the following conditions are both true:
+- The operation is requested by the user ("calculate" or "run"), and
+- The code does not make a plot, chart, graph, or any other visualization.
+You may call a help tool before transfering control to an agent:
+- If an R dataset ("dataset") is requested, use help_package('datasets') to find the correct dataset name.
+- If the user requests documentation for specific datasets or functions, use the `help_topic` tool.
+Examples:
+- Query includes "?boxplot": The user is requesting documentation. Call help_topic('boxplot') then transfer to an agent.
+- "Plot distance vs speed from the cars dataset": This is a plot request using an R dataset. Call help_package('datasets') then transfer to the `Data` agent.
+- "Calculate x = cos(x) for x = 0 to 12 and make a plot": This is a plot that does not require data. Transfer to the `Plot` agent.
+- "Run x <- 2": This is code execution without data or plot. Transfer to the `Run` agent.
+- "Load the data": The user is asking to load data from an uploaded file. Transfer to the `Data` agent.
+Important notes:
+- Data may be provided directly by the user, in a URL, in an "Uploaded File" message, or an R dataset.
+- You must not use the `Run` agent to make a plot or execute any other plotting commands.
+- The only way to make a plot, chart, graph, or other visualization is to transfer to the `Data` or `Plot` agents.
+- Do not use install.packages(), library(), or any other commands for package installation and loading.
+"""
+Run = """
+You are an agent that runs R code using the `run_visible` and `run_hidden` tools.
+You cannot make plots.
+Perform the following actions:
+- Interpret the user's request as R code.
+- If the code makes a plot (including ggplot or any other type of graph or visualization), transfer to the `Plot` agent.
+- If the code assigns the result to a variable, pass the code to the `run_hidden` tool.
+- Otherwise, pass the code to the `run_visible` tool.
+Important notes:
+- The `run_hidden` tool runs R commands without returning the result. This is useful for reducing LLM token usage while working with large variables.
+- You can use dplyr, tidyr, and other tidyverse packages.
+- Your response should always be valid, self-contained R code.
+- If the tool response is an error (isError: true), respond with the exact text of the error message and stop running code.
+"""
+Data = """
+You are an agent that loads and summarizes data.
+Your main task has three parts:
+1. Generate R code to create a `df` object and summarize it with `data_summary(df)`.
+2. Use the `run_visible` tool to execute the code.
+3. Transfer to the `Plot` agent to make a plot.
+Choose the first available data source:
+1: Data provided directly by the user.
+2: File provided in an "Uploaded File" message. Do not use other files.
+3: URL provided by the user. Do not use other URLs.
+4: Available R dataset that matches the user's request.
+Examples of code for `run_visible`:
+- User requests "plot 1,2,3 10,20,30": code is `df <- data.frame(x = c(1,2,3), y = (10, 20, 30))
+data_summary(df)`.
+- User requests "plot cars data": code is `df <- data.frame(cars)
+data_summary(df)`
+- To read CSV data from a URL, use `df <- read.csv(csv_url)`, where csv_url is the exact URL provided by the user.
+- To read CSV data from a file, use `df <- read.csv(file_path)`, where file_path is provided in an "Uploaded File" user message.
+What to do after calling `run_visible`:
+- If "Data Summary" exists and the user requested a plot, then pass control to the `Plot` agent.
+- If "Data Summary" exists and the user did not request a plot, then stop the workflow.
+- If the user provided data but "Data Summary" does not exist, then stop and report a problem.
+Important notes:
+- Do not use the `run_visible` tool to make a plot.
+- Run `data_summary(df)` in your code. Do not run `summary(df)`.
+- You can use dplyr, tidyr, and other tidyverse packages.
+"""
+Plot = """
+You are an agent that makes plots with R code using the `make_plot` and `make_ggplot` tools.
+Coding strategy:
+- Use previously assigned variables (especially `df`) in your code.
+    - Do not load data yourself.
+    - Use a specific variable other than `df` if it is better for making the plot.
+- Choose column names in `df` based on the user's request.
+    - Column names are case-sensitive, syntactically valid R names.
+    - Look in the Data Summary for details.
+- No data are required for plotting functions and simulations.
+Plot tools:
+- For base R graphics use the `make_plot` tool.
+- For ggplot/ggplot2 use the `make_ggplot` tool.
+- Both of these tools save the plot as a conversation artifact that is visible to the user.
+Examples:
+- User requests to plot "dates", but the Data Summary lists a "Date" column. Answer: use `df$Date`.
+- User requests to plot "volcano", but `df` also exists. Answer: The `volcano` matrix is better for images; use `image(volcano)`.
+Important notes:
+- Use base R graphics unless the user asks for ggplot or ggplot2.
+- Pay attention to the user's request and use your knowledge of R to write code that gives the best-looking plot.
+- Your response should always be valid, self-contained R code.
+"""

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+google-adk==1.22.0
+litellm==1.80.13
+mcp==1.25.0

server.R ADDED Viewed

	@@ -0,0 +1,134 @@

+# 20251009 Added plot tool
+# 20251023 Added help tools
+# Load ellmer for tool() and type_*()
+library(ellmer)
+# Read prompts
+source("prompts.R")
+# Get help for a package
+help_package <- function(package) {
+  help_page <- help(package = (package), help_type = "text")
+  paste(unlist(help_page$info), collapse = "\n")
+}
+# Get help for a topic
+# Adapted from https://github.com/posit-dev/btw:::help_to_rd
+help_topic <- function(topic) {
+  help_page <- help(topic = (topic), help_type = "text")
+  if(length(help_page) == 0) {
+    return(paste0("No help found for '", topic, "'. Please check the name and try again."))
+  }
+  # Handle multiple help files for a topic
+  # e.g. help_topic(plot) returns the help for both base::plot and graphics::plot.default
+  help_paths <- as.character(help_page)
+  help_result <- sapply(help_paths, function(help_path) {
+    rd_name <- basename(help_path)
+    rd_package <- basename(dirname(dirname(help_path)))
+    db <- tools::Rd_db(rd_package)[[paste0(rd_name, ".Rd")]]
+    paste(as.character(db), collapse = "")
+  })
+  # Insert headings to help the LLM distinguish multiple help files
+  # Heading before each help file (e.g. Help file 1, Help file 2)
+  help_result <- paste0("## Help file ", seq_along(help_result), ":\n", help_result)
+  # Heading at start of message (e.g. 2 help files were retrieved)
+  if(length(help_paths) == 1) help_info <- paste0("# ", length(help_paths), " help file was retrieved: ", paste(help_paths, collapse = ", "), ":\n")
+  if(length(help_paths) > 1) help_info <- paste0("# ", length(help_paths), " help files were retrieved: ", paste(help_paths, collapse = ", "), ":\n")
+  help_result <- c(help_info, help_result)
+  help_result
+}
+# Run R code and return the result
+# https://github.com/posit-dev/mcptools/issues/71
+run_visible <- function(code) {
+  eval(parse(text = code), globalenv())
+}
+# Run R code without returning the result
+# https://github.com/posit-dev/mcptools/issues/71
+run_hidden <- function(code) {
+  eval(parse(text = code), globalenv())
+  return("The code executed successfully")
+}
+# Run R code to make a plot and return the image data
+make_plot <- function(code) {
+  # Cursor, Bing and Google AI all suggest this but it causes an error:
+  # Error in png(filename = raw_conn) :
+  #   'filename' must be a non-empty character string
+  ## Write plot to an in-memory PNG
+  #raw_conn <- rawConnection(raw(), open = "wb")
+  #png(filename = raw_conn)
+  # Use a temporary file to save the plot
+  filename <- tempfile(fileext = ".dat")
+  on.exit(unlink(filename))
+  # Run the plotting code (this should include e.g. png() and dev.off())
+  # The code uses a local variable (filename), so don't use envir = globalenv() here
+  eval(parse(text = code))
+  # Return a PNG image as raw bytes so ADK can save it as an artifact
+  readr::read_file_raw(filename)
+}
+# This is the same code as make_plot() but has a different tool description
+make_ggplot <- function(code) {
+  filename <- tempfile(fileext = ".dat")
+  on.exit(unlink(filename))
+  eval(parse(text = code))
+  readr::read_file_raw(filename)
+}
+mcptools::mcp_server(tools = list(
+  tool(
+    help_package,
+    help_package_prompt,
+    arguments = list(
+      package = type_string("Package to get help for.")
+    )
+  ),
+  tool(
+    help_topic,
+    help_topic_prompt,
+    arguments = list(
+      topic = type_string("Topic or function to get help for.")
+    )
+  ),
+  tool(
+    run_visible,
+    run_visible_prompt,
+    arguments = list(
+      code = type_string("R code to run.")
+    )
+  ),
+  tool(
+    run_hidden,
+    run_hidden_prompt,
+    arguments = list(
+      code = type_string("R code to run.")
+    )
+  ),
+  tool(
+    make_plot,
+    make_plot_prompt,
+    arguments = list(
+      code = type_string("R code to make the plot.")
+    )
+  ),
+  tool(
+    make_ggplot,
+    make_ggplot_prompt,
+    arguments = list(
+      code = type_string("R code to make the plot.")
+    )
+  )
+))