Spaces:
Running
Running
jedick
commited on
Commit
·
23e6380
1
Parent(s):
74d192d
Check if requested packages are already installed
Browse files- PlotMyData/__init__.py +5 -1
- PlotMyData/agent.py +1 -1
- data_summary.R +0 -37
- entrypoint.sh +4 -0
- functions.R +101 -0
- profile.R +1 -1
- prompts.py +13 -7
PlotMyData/__init__.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
-
import os
|
| 2 |
from pathlib import Path
|
|
|
|
|
|
|
| 3 |
from . import agent
|
| 4 |
|
| 5 |
# Ensure upload directory exists
|
|
@@ -7,3 +8,6 @@ upload_dir = "/tmp/uploads"
|
|
| 7 |
Path(upload_dir).mkdir(parents=True, exist_ok=True)
|
| 8 |
# Read, write, execute for owner; read and execute for others
|
| 9 |
os.chmod(upload_dir, 0o755)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
+
import warnings
|
| 3 |
+
import os
|
| 4 |
from . import agent
|
| 5 |
|
| 6 |
# Ensure upload directory exists
|
|
|
|
| 8 |
Path(upload_dir).mkdir(parents=True, exist_ok=True)
|
| 9 |
# Read, write, execute for owner; read and execute for others
|
| 10 |
os.chmod(upload_dir, 0o755)
|
| 11 |
+
|
| 12 |
+
# Suppress Pydantic serialization warnings
|
| 13 |
+
warnings.filterwarnings("ignore", message="Pydantic serializer warnings")
|
PlotMyData/agent.py
CHANGED
|
@@ -28,7 +28,7 @@ server_params = StdioServerParameters(
|
|
| 28 |
],
|
| 29 |
)
|
| 30 |
# STDIO transport to local R MCP server
|
| 31 |
-
connection_params = StdioConnectionParams(server_params=server_params, timeout=
|
| 32 |
|
| 33 |
# Define model
|
| 34 |
# If we're using the OpenAI API, get the value of OPENAI_MODEL_NAME set by entrypoint.sh
|
|
|
|
| 28 |
],
|
| 29 |
)
|
| 30 |
# STDIO transport to local R MCP server
|
| 31 |
+
connection_params = StdioConnectionParams(server_params=server_params, timeout=60)
|
| 32 |
|
| 33 |
# Define model
|
| 34 |
# If we're using the OpenAI API, get the value of OPENAI_MODEL_NAME set by entrypoint.sh
|
data_summary.R
DELETED
|
@@ -1,37 +0,0 @@
|
|
| 1 |
-
# Summarize a data frame, for example:
|
| 2 |
-
# Data frame dimensions: 10 rows x 3 columns
|
| 3 |
-
# Data Summary:
|
| 4 |
-
# col1: integer
|
| 5 |
-
# col2: numeric, missing=3
|
| 6 |
-
# col3: character
|
| 7 |
-
|
| 8 |
-
data_summary <- function(df) {
|
| 9 |
-
nrows <- nrow(df)
|
| 10 |
-
ncols <- ncol(df)
|
| 11 |
-
lines <- c(sprintf("Data frame dimensions: %d rows x %d columns", nrows, ncols), "Data Summary:")
|
| 12 |
-
|
| 13 |
-
# Helper for R data type names
|
| 14 |
-
type_map <- function(x) {
|
| 15 |
-
if (is.factor(x)) return("factor")
|
| 16 |
-
if (is.character(x)) return("character")
|
| 17 |
-
if (is.logical(x)) return("logical")
|
| 18 |
-
if (inherits(x, "Date")) return("Date")
|
| 19 |
-
if (is.numeric(x)) {
|
| 20 |
-
vals <- x[!is.na(x)]
|
| 21 |
-
if (length(vals) > 0 && all(abs(vals - round(vals)) < .Machine$double.eps^0.5)) return("integer")
|
| 22 |
-
return("numeric")
|
| 23 |
-
}
|
| 24 |
-
return(class(x)[1])
|
| 25 |
-
}
|
| 26 |
-
|
| 27 |
-
for (col in names(df)) {
|
| 28 |
-
dtype <- type_map(df[[col]])
|
| 29 |
-
miss <- sum(is.na(df[[col]]))
|
| 30 |
-
if (miss > 0) {
|
| 31 |
-
lines <- c(lines, sprintf("%s: %s, missing=%d", col, dtype, miss))
|
| 32 |
-
} else {
|
| 33 |
-
lines <- c(lines, sprintf("%s: %s", col, dtype))
|
| 34 |
-
}
|
| 35 |
-
}
|
| 36 |
-
paste(lines, collapse = "\n")
|
| 37 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
entrypoint.sh
CHANGED
|
@@ -18,6 +18,10 @@ export PATH="/opt/venv/bin:$PATH"
|
|
| 18 |
export OPENAI_MODEL_NAME=gpt-4o
|
| 19 |
echo "Using OpenAI with ${OPENAI_MODEL_NAME}"
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
# For local development, the API key is read from a file
|
| 22 |
# (not needed on HF Spaces, where secrets are injected into container's environment)
|
| 23 |
if [ -z "$OPENAI_API_KEY" ]; then
|
|
|
|
| 18 |
export OPENAI_MODEL_NAME=gpt-4o
|
| 19 |
echo "Using OpenAI with ${OPENAI_MODEL_NAME}"
|
| 20 |
|
| 21 |
+
# Suppress e.g. UserWarning: [EXPERIMENTAL] BaseAuthenticatedTool: This feature is experimental ...
|
| 22 |
+
# https://github.com/google/adk-python/commit/4afc9b2f33d63381583cea328f97c02213611529
|
| 23 |
+
export ADK_SUPPRESS_EXPERIMENTAL_FEATURE_WARNINGS=true
|
| 24 |
+
|
| 25 |
# For local development, the API key is read from a file
|
| 26 |
# (not needed on HF Spaces, where secrets are injected into container's environment)
|
| 27 |
if [ -z "$OPENAI_API_KEY" ]; then
|
functions.R
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Summarize a data frame, for example:
|
| 2 |
+
# Data frame dimensions: 10 rows x 3 columns
|
| 3 |
+
# Data Summary:
|
| 4 |
+
# col1: integer
|
| 5 |
+
# col2: numeric, missing=3
|
| 6 |
+
# col3: character
|
| 7 |
+
data_summary <- function(df) {
|
| 8 |
+
nrows <- nrow(df)
|
| 9 |
+
ncols <- ncol(df)
|
| 10 |
+
lines <- c(sprintf("Data frame dimensions: %d rows x %d columns", nrows, ncols), "Data Summary:")
|
| 11 |
+
|
| 12 |
+
# Helper for R data type names
|
| 13 |
+
type_map <- function(x) {
|
| 14 |
+
if (is.factor(x)) return("factor")
|
| 15 |
+
if (is.character(x)) return("character")
|
| 16 |
+
if (is.logical(x)) return("logical")
|
| 17 |
+
if (inherits(x, "Date")) return("Date")
|
| 18 |
+
if (is.numeric(x)) {
|
| 19 |
+
vals <- x[!is.na(x)]
|
| 20 |
+
if (length(vals) > 0 && all(abs(vals - round(vals)) < .Machine$double.eps^0.5)) return("integer")
|
| 21 |
+
return("numeric")
|
| 22 |
+
}
|
| 23 |
+
return(class(x)[1])
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
for (col in names(df)) {
|
| 27 |
+
dtype <- type_map(df[[col]])
|
| 28 |
+
miss <- sum(is.na(df[[col]]))
|
| 29 |
+
if (miss > 0) {
|
| 30 |
+
lines <- c(lines, sprintf("%s: %s, missing=%d", col, dtype, miss))
|
| 31 |
+
} else {
|
| 32 |
+
lines <- c(lines, sprintf("%s: %s", col, dtype))
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
paste(lines, collapse = "\n")
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
# Check if packages are installed and return status message
|
| 39 |
+
# Example: check_packages(c("nlme", "ggplot2", "scatterplot3d"))
|
| 40 |
+
# Returns: "nlme and ggplot2 are already installed" if all are installed
|
| 41 |
+
# Or: "scatterplot3d needs to be installed" if some are missing
|
| 42 |
+
# The message format makes it easy to determine if installation is needed:
|
| 43 |
+
# - If message contains "are already installed" and does NOT contain "needs to be installed", all packages are installed
|
| 44 |
+
# - If message contains "needs to be installed", some packages need installation
|
| 45 |
+
check_packages <- function(packages) {
|
| 46 |
+
if (length(packages) == 0) {
|
| 47 |
+
return("No packages specified")
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
# Check which packages are installed
|
| 51 |
+
installed <- sapply(packages, function(pkg) {
|
| 52 |
+
requireNamespace(pkg, quietly = TRUE)
|
| 53 |
+
})
|
| 54 |
+
|
| 55 |
+
installed_pkgs <- packages[installed]
|
| 56 |
+
missing_pkgs <- packages[!installed]
|
| 57 |
+
|
| 58 |
+
if (length(installed_pkgs) == length(packages)) {
|
| 59 |
+
# All packages are installed
|
| 60 |
+
if (length(installed_pkgs) == 1) {
|
| 61 |
+
return(paste(installed_pkgs, "is already installed"))
|
| 62 |
+
} else if (length(installed_pkgs) == 2) {
|
| 63 |
+
return(paste(installed_pkgs[1], "and", installed_pkgs[2], "are already installed"))
|
| 64 |
+
} else {
|
| 65 |
+
# Format: "pkg1, pkg2, and pkg3 are already installed"
|
| 66 |
+
pkgs_list <- paste(installed_pkgs[-length(installed_pkgs)], collapse = ", ")
|
| 67 |
+
return(paste(pkgs_list, "and", installed_pkgs[length(installed_pkgs)], "are already installed"))
|
| 68 |
+
}
|
| 69 |
+
} else if (length(installed_pkgs) > 0) {
|
| 70 |
+
# Some packages are installed, some are missing
|
| 71 |
+
if (length(installed_pkgs) == 1) {
|
| 72 |
+
installed_msg <- paste(installed_pkgs, "is already installed")
|
| 73 |
+
} else if (length(installed_pkgs) == 2) {
|
| 74 |
+
installed_msg <- paste(installed_pkgs[1], "and", installed_pkgs[2], "are already installed")
|
| 75 |
+
} else {
|
| 76 |
+
pkgs_list <- paste(installed_pkgs[-length(installed_pkgs)], collapse = ", ")
|
| 77 |
+
installed_msg <- paste(pkgs_list, "and", installed_pkgs[length(installed_pkgs)], "are already installed")
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
if (length(missing_pkgs) == 1) {
|
| 81 |
+
missing_msg <- paste(missing_pkgs, "needs to be installed")
|
| 82 |
+
} else if (length(missing_pkgs) == 2) {
|
| 83 |
+
missing_msg <- paste(missing_pkgs[1], "and", missing_pkgs[2], "need to be installed")
|
| 84 |
+
} else {
|
| 85 |
+
pkgs_list <- paste(missing_pkgs[-length(missing_pkgs)], collapse = ", ")
|
| 86 |
+
missing_msg <- paste(pkgs_list, "and", missing_pkgs[length(missing_pkgs)], "need to be installed")
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
return(paste(installed_msg, ";", missing_msg))
|
| 90 |
+
} else {
|
| 91 |
+
# No packages are installed
|
| 92 |
+
if (length(missing_pkgs) == 1) {
|
| 93 |
+
return(paste(missing_pkgs, "needs to be installed"))
|
| 94 |
+
} else if (length(missing_pkgs) == 2) {
|
| 95 |
+
return(paste(missing_pkgs[1], "and", missing_pkgs[2], "need to be installed"))
|
| 96 |
+
} else {
|
| 97 |
+
pkgs_list <- paste(missing_pkgs[-length(missing_pkgs)], collapse = ", ")
|
| 98 |
+
return(paste(pkgs_list, "and", missing_pkgs[length(missing_pkgs)], "need to be installed"))
|
| 99 |
+
}
|
| 100 |
+
}
|
| 101 |
+
}
|
profile.R
CHANGED
|
@@ -5,7 +5,7 @@ options(repos = c(CRAN = "https://cloud.r-project.org"))
|
|
| 5 |
library(tidyverse)
|
| 6 |
|
| 7 |
# Use our own data summary function
|
| 8 |
-
source("
|
| 9 |
|
| 10 |
# Make this R session visible to the mcptools MCP server
|
| 11 |
# NOTE: mcp_session() needs to be run in an *interactive* R session, so we can't put it in server.R
|
|
|
|
| 5 |
library(tidyverse)
|
| 6 |
|
| 7 |
# Use our own data summary function
|
| 8 |
+
source("functions.R")
|
| 9 |
|
| 10 |
# Make this R session visible to the mcptools MCP server
|
| 11 |
# NOTE: mcp_session() needs to be run in an *interactive* R session, so we can't put it in server.R
|
prompts.py
CHANGED
|
@@ -122,20 +122,26 @@ You are an agent that installs R packages using the `run_visible` tool.
|
|
| 122 |
Your workflow:
|
| 123 |
|
| 124 |
1. Identify which packages need to be installed.
|
| 125 |
-
2.
|
| 126 |
-
3.
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
Important notes:
|
| 132 |
|
| 133 |
-
- ALWAYS
|
|
|
|
|
|
|
| 134 |
- ALWAYS clearly state which packages will be installed.
|
| 135 |
- Use `run_visible` with `install.packages()` to install packages.
|
| 136 |
- For multiple packages, use: `install.packages(c("package1", "package2"))`.
|
| 137 |
- For a single package, use: `install.packages("package1")`.
|
| 138 |
- If installation fails, report the error to the user and do not transfer control.
|
| 139 |
- If installation succeeds, transfer control back to the calling agent to continue the original task.
|
| 140 |
-
- Do not install packages without explicit user confirmation.
|
| 141 |
"""
|
|
|
|
| 122 |
Your workflow:
|
| 123 |
|
| 124 |
1. Identify which packages need to be installed.
|
| 125 |
+
2. First, check package installation status by calling `check_packages()` function using the `run_visible` tool. For example: `check_packages(c("package1", "package2"))`.
|
| 126 |
+
3. Examine the result from `check_packages()`:
|
| 127 |
+
- If the result indicates all packages are already installed (contains "are already installed" and does NOT contain "needs to be installed"), then immediately transfer control back to the agent that requested the installation WITHOUT asking for confirmation.
|
| 128 |
+
- If the result indicates some or all packages need to be installed (contains "needs to be installed"), proceed to step 4.
|
| 129 |
+
4. Clearly state which packages you will install (e.g., "I need to install the following packages: scatterplot3d, plotly").
|
| 130 |
+
5. Ask the user for confirmation before proceeding (e.g., "Should I proceed with installing these packages?").
|
| 131 |
+
6. Wait for the user to confirm before installing.
|
| 132 |
+
7. Once confirmed, use the `run_visible` tool with R code like: `install.packages(c("package1", "package2"))` to install only the packages that are missing.
|
| 133 |
+
8. After successful installation, transfer control back to the agent that requested the installation (e.g., transfer to the `Plot` agent if it was making a plot).
|
| 134 |
|
| 135 |
Important notes:
|
| 136 |
|
| 137 |
+
- ALWAYS call `check_packages()` first to check installation status before attempting to install.
|
| 138 |
+
- If all packages are already installed, return to the previous agent immediately without asking for confirmation.
|
| 139 |
+
- Only ask for user confirmation if some packages actually need to be installed.
|
| 140 |
- ALWAYS clearly state which packages will be installed.
|
| 141 |
- Use `run_visible` with `install.packages()` to install packages.
|
| 142 |
- For multiple packages, use: `install.packages(c("package1", "package2"))`.
|
| 143 |
- For a single package, use: `install.packages("package1")`.
|
| 144 |
- If installation fails, report the error to the user and do not transfer control.
|
| 145 |
- If installation succeeds, transfer control back to the calling agent to continue the original task.
|
| 146 |
+
- Do not install packages without explicit user confirmation (unless all packages are already installed).
|
| 147 |
"""
|