Spaces:
Running
Running
File size: 11,430 Bytes
3cb103a 23cec4c 0487e2a 23cec4c 0487e2a 23cec4c 0487e2a 23cec4c 0487e2a 3cb103a 0487e2a 23cec4c 0487e2a 23cec4c 0487e2a 23cec4c 0487e2a 3cb103a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 |
import gradio as gr
import services
# --- Wrapper functions for Gradio (sync versions) ---
def search_relevant_indicators(query: str, top_k: int):
"""Search for a shortlist of relevant indicators from the World Development Indicators (WDI) Data360 API given the query.
This tool is optimized for English language queries, so try to use English for your query. If the user's query is not in English, you may need to translate it to English first. This tool is used to find indicators and does not consider any geography or time period, so you should not include any in your query.
The search ranking may not be optimal, so the LLM may use this as shortlist and pick the most relevant from the list (if any). You, as an LLM, must always get at least `top_k=20` for better recall.
Args:
query: The search query by the user or one formulated by an LLM based on the user's prompt. This query should be in English. If the user's query is not in English, you may need to translate it to English first. This tool is used to find indicators and does not consider any geography or time period, so you should not include any in your query.
top_k: The number of shortlisted indicators that will be returned that are semantically related to the query. IMPORTANT: You, as an LLM, must ALWAYS set this argument to at least 20.
Returns:
A dictionary with keys `indicators` and `note`. The `indicators` key contains a list of indicator objects with keys indicator code/idno and name. The `note` key contains a note about the search.
"""
# (If services.search_relevant_indicators is already sync, this just forwards.)
return services.search_relevant_indicators(query=query, top_k=top_k)
def indicator_info(indicator_ids_str: str):
"""Provides definition information for the given indicator id (idno).
Args:
indicator_ids_str: An indicator id or a comma-separated list of indicator ids (idno) that additional information is being requested for.
Returns:
List of objects with keys indicator code/idno, name, and definition.
"""
# Split on commas and strip each ID
ids = [
id_.strip()
for id_ in indicator_ids_str.replace(" ", "").split(",")
if id_.strip()
]
return services.indicator_info(indicator_ids=ids)
def get_wdi_data(
indicator_ids: str | list[str], country_codes_str: str, date: str, per_page: int
):
"""After relevant data is identified by using the `search_relevant_indicators`, this tool fetches indicator data for a given indicator id(s) (idno) from the World Bank's World Development Indicators (WDI) Data360 API. The LLM must exclusively use this tool when the user asks for data. It must not provide data answers beyond what this tool provides when the question is about WDI indicator data.
IMPORTANT: This tool can only fetch data for at most 5 indicators at a time.
Args:
indicator_ids: The WDI indicator code (e.g., "WB_WDI_NY_GDP_MKTP_CD" for GDP in current US$). Comma separated if more than one.
country_codes_str: The 3-letter ISO country code (e.g., "USA", "CHN", "IND"), or "all" for all countries. Comma separated if more than one.
date: A year (e.g., "2022") or a range (e.g., "2000:2022") to filter the results.
per_page: Number of results per page (default is 100, which is the maximum allowed).
Returns:
A dictionary with keys `data` and `note`. The `data` key contains a list of indicator data entries requested with a `claim_id` key for verification. The `note` key contains a note about the data returned.
"""
# Parse country_codes_str:
cc_input = country_codes_str.strip()
if cc_input.lower() == "all":
country_codes = "all"
else:
# Split on commas, uppercase each, strip spaces
country_codes = [c.strip().upper() for c in cc_input.split(",") if c.strip()]
if isinstance(indicator_ids, str):
indicator_ids = indicator_ids.replace(" ", "").split(",")
if len(indicator_ids) > 5:
return dict(
data=[],
note=f"ERROR: This tool can only fetch data for at most 5 indicators at a time, but you requested {len(indicator_ids)}.",
)
# If user left date blank, pass None
date_filter = date.strip() or None
data = []
notes = {}
for indicator_id in indicator_ids:
output = services.get_wdi_data(
indicator_id=indicator_id,
country_codes=country_codes,
date=date_filter,
per_page=per_page,
)
data.extend(output["data"])
notes[output["indicator_id"]] = output["note"]
return dict(data=data, note=notes)
def used_indicators(indicator_ids: list[str] | str):
"""The LLM can use this tool to let the user know which indicators it has used in generating its response.
Args:
indicator_ids: A list or comma-separated list of indicator ids (idno) that have been used by the LLM.
Returns:
A list of indicator ids (idno) that have been used by the LLM. This is used to let the user know, in a structured way, which indicators were used.
"""
return services.used_indicators(indicator_ids=indicator_ids)
def get_data360_link(
indicator_id: str,
country_codes: list[str] | str | None = None,
year: str | None = None,
) -> dict[str, str]:
"""The LLM can use this tool to get the link to the Data360 page for the given indicator id (idno). Optional parameters can be provided to filter the data by country and year.
Args:
indicator_id: The WDI indicator code (e.g., "WB_WDI_NY_GDP_MKTP_CD" for GDP in current US$).
country_codes: The 3-letter ISO country code (e.g., "USA", "CHN", "IND"), or set to `None` for all countries. Comma separated if more than one.
year: The year to view the data for. Set to `None` for the most recent year.
Returns:
A dictionary with keys `url` containing a link to the Data360 page for the given indicator id (idno) with the optional parameters.
"""
return services.get_data360_link(
indicator_id=indicator_id, country_codes=country_codes, year=year
)
def build_interface():
# --- Build the Gradio interface ---
with gr.Blocks(title="WDI MCP Gradio") as demo:
gr.Markdown("## WDI MCP: Gradio Interface")
gr.Markdown(
"Use the tabs below to call *search_relevant_indicators*, *indicator_info*, or *get_wdi_data*."
)
with gr.Tab("Search Relevant Indicators"):
gr.Markdown(
"Search for a shortlist of relevant WDI indicators given a query. "
"Remember: For best recall, set **Top K ≥ 20**."
)
query_input = gr.Textbox(
label="Query", placeholder="e.g. 'GDP of Asian countries'", lines=1
)
top_k_input = gr.Slider(
label="Top K",
minimum=1,
maximum=50,
step=1,
value=20,
info="At least 20 recommended",
)
search_btn = gr.Button("Search")
search_output = gr.JSON(label="Search Results (dict)")
# When button clicked, call our wrapper and display output in JSON
search_btn.click(
fn=search_relevant_indicators,
inputs=[query_input, top_k_input],
outputs=search_output,
)
with gr.Tab("Indicator Info"):
gr.Markdown(
"Provide one or more indicator IDs (comma-separated) to retrieve definitions."
)
indicator_ids_input = gr.Textbox(
label="Indicator IDs",
placeholder="e.g. WB_WDI_NY_GDP_MKTP_CD, WB_WDI_SP_POP_TOTL",
lines=1,
)
info_btn = gr.Button("Get Definitions")
info_output = gr.JSON(label="Indicator Info (list)")
info_btn.click(
fn=indicator_info,
inputs=indicator_ids_input,
outputs=info_output,
)
with gr.Tab("Get WDI Data"):
gr.Markdown(
"Fetch actual WDI data for a given indicator and country set. "
"Set **Country Codes** to ‘all’ or a comma-separated list of 3-letter codes."
)
indicator_id_input = gr.Textbox(
label="Indicator ID", placeholder="e.g. WB_WDI_NY_GDP_MKTP_CD", lines=1
)
country_codes_input = gr.Textbox(
label="Country Codes",
placeholder="e.g. 'USA, CHN' or 'all'",
lines=1,
)
date_input = gr.Textbox(
label="Date Filter",
placeholder="Year (e.g. '2022') or range (e.g. '2000:2022') – leave empty for no filter",
lines=1,
)
per_page_input = gr.Number(
label="Per Page",
value=5,
precision=0,
info="Max allowed is usually 100",
)
data_btn = gr.Button("Fetch Data")
data_output = gr.JSON(label="WDI Data (dict)")
data_btn.click(
fn=get_wdi_data,
inputs=[
indicator_id_input,
country_codes_input,
date_input,
per_page_input,
],
outputs=data_output,
)
with gr.Tab("Used Indicators"):
gr.Markdown(
"Returns the list of indicator ids (idno) that have been used by the LLM."
)
indicator_ids_input = gr.Textbox(
label="Indicator IDs",
placeholder="e.g. WB_WDI_NY_GDP_MKTP_CD, WB_WDI_SP_POP_TOTL",
lines=1,
)
used_indicators_btn = gr.Button("Get Used Indicators")
used_indicators_output = gr.JSON(label="Used Indicators (list)")
used_indicators_btn.click(
fn=used_indicators,
inputs=indicator_ids_input,
outputs=used_indicators_output,
)
with gr.Tab("Get Data360 Link"):
gr.Markdown(
"Returns the link to the Data360 page for the given indicator id (idno). Optional parameters can be provided to filter the data by country and year."
)
indicator_id_input = gr.Textbox(
label="Indicator ID", placeholder="e.g. WB_WDI_NY_GDP_MKTP_CD", lines=1
)
country_codes_input = gr.Textbox(
label="Country Codes",
placeholder="e.g. 'USA, CHN' or 'all'",
lines=1,
)
year_input = gr.Textbox(
label="Year",
placeholder="e.g. '2022'",
lines=1,
)
data360_link_btn = gr.Button("Get Data360 Link")
data360_link_output = gr.JSON(label="Data360 Link (dict)")
data360_link_btn.click(
fn=get_data360_link,
inputs=[indicator_id_input, country_codes_input, year_input],
outputs=data360_link_output,
)
return demo
if __name__ == "__main__":
demo = build_interface()
demo.launch(mcp_server=True)
|