BOTeome / app.py
juan
file process
aa9ed5b
import os
# Patch gradio_client bug: _json_schema_to_python_type doesn't guard against
# `additionalProperties: false` (a bool) being passed as a schema dict.
import gradio_client.utils as _gcu
_orig_j2p = _gcu._json_schema_to_python_type
def _safe_j2p(schema, defs=None):
if not isinstance(schema, dict):
return "Any"
return _orig_j2p(schema, defs)
_gcu._json_schema_to_python_type = _safe_j2p
import gradio as gr
from langchain_community.utilities import SQLDatabase
from langchain_anthropic import ChatAnthropic
from langgraph.checkpoint.memory import InMemorySaver
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain.agents import create_agent
from scripts.db import create_database
DB_URI = "sqlite:///proteomics.db"
model = ChatAnthropic(model="claude-haiku-4-5-20251001")
memory = InMemorySaver()
agent = None
def load_data(csv_file):
global agent, memory
if csv_file is None:
yield "No file uploaded.", []
return
try:
yield "Processing CSV file...", []
memory = InMemorySaver()
create_database(DB_URI, csv_file)
yield "Connecting to database...", []
db = SQLDatabase.from_uri(DB_URI)
system_prompt = """
You are an agent designed to interact with a SQL database.
Given an input question, create a syntactically correct {dialect} query to run,
then look at the results of the query and return the answer. Unless the user
specifies a specific number of examples they wish to obtain, always limit your
query to at most {top_k} results.
You can order the results by a relevant column to return the most interesting
examples in the database. Never query for all the columns from a specific table,
only ask for the relevant columns given the question.
You MUST double check your query before executing it. If you get an error while
executing a query, rewrite the query and try again.
DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the
database.
To start you should ALWAYS look at the tables in the database to see what you
can query. Do NOT skip this step.
Then you should query the schema of the most relevant tables.
""".format(dialect=db.dialect, top_k=5)
yield "Initializing agent...", []
toolkit = SQLDatabaseToolkit(db=db, llm=model)
tools = toolkit.get_tools()
agent = create_agent(
model,
tools,
system_prompt=system_prompt,
checkpointer=memory
)
tables = db.get_usable_table_names()
yield f"Ready! Loaded tables: {', '.join(tables)}", []
except Exception as e:
yield f"Error loading data: {e}", []
def respond(question, history):
if agent is None:
return history + [[question, "Please upload a CSV file first."]], ""
for step in agent.stream(
{"messages": [{"role": "user", "content": question}]},
config={"configurable": {"thread_id": "session_1"}},
stream_mode="values",
):
continue
answer = step["messages"][-1].content
return history + [[question, answer]], ""
css = """
.prose h1 {color: black}
.gradio-container {background-color: white; width: 100%;}
.bubble-wrap {background-color: white}
.svelte-cmf5ev {color: white; background-image: linear-gradient(to right bottom, rgb(91,76,251), rgb(91,76,251));}
.svelte-1f354aw {background-color: white; color: black}
.svelte-1b6s6s {background-color: white; color: black}
.flex-wrap.user.svelte-1ggj411 {background-color: #70b1fb; color: red;}
.flex-wrap.bot.svelte-1ggj411 {background-color: #ad3dfa; color: red;}
.flex-wrap.bot.svelte-1ggj411.dark {background-color: #ad3dfa; color: red;}
.message.pending.svelte-1gpwetz {background-color: #ad3dfa}
.contain.svelte-1rjryqp.svelte-1rjryqp.svelte-1rjryqp {background-color: white; color: black}
.svelte-1ed2p3z {background-image: url(https://huggingface.co/spaces/jugacostase/BOTeome/resolve/044cd65f416a2c91b464b06fbeef07e0e46bda50/static/img/BOTeome_logo.png); height:170px; background-size: 500px; background-repeat: no-repeat;}
.dark {--body-text-color: white; --input-background-fill: black}
.label.svelte-p5q82i {color: black;}
.bot.svelte-pcjl1g.svelte-pcjl1g {background-color: #B84BFE; color: white;}
.user.svelte-pcjl1g.svelte-pcjl1g {background-color: #93d0ff; color: white;}
.svelte-1viwdyg {background: #93d0ff;}
label.svelte-i3tvor {background: #93d0ff; color: white;}
.label.show_textbox_border.svelte-173056l {background-color: white; color: black; border-color: gray; border-style: solid; border-width: 1px}
.textarea.svelte-173056l {background-color: white; color: black; border-color: gray; border-style: solid; border-width: 1px}
"""
with gr.Blocks(css=css) as demo:
gr.HTML("<div class='svelte-1ed2p3z'></div>")
with gr.Row():
csv_upload = gr.File(label="Upload CSV", file_types=[".csv"])
load_btn = gr.Button("Load Data", variant="primary")
status_box = gr.Textbox(label="Status", interactive=False, value="Upload a CSV file to get started.")
chatbot = gr.Chatbot(height=501)
msg = gr.Textbox(placeholder="Ask about your proteomics data", container=False, scale=7)
load_btn.click(
fn=load_data,
inputs=[csv_upload],
outputs=[status_box, chatbot]
)
msg.submit(
fn=respond,
inputs=[msg, chatbot],
outputs=[chatbot, msg]
)
demo.launch(show_api=False)