|
|
import gradio as gr |
|
|
import requests |
|
|
import base64 |
|
|
import pandas as pd |
|
|
import io |
|
|
import time |
|
|
import os |
|
|
from io import BytesIO |
|
|
from fpdf import FPDF |
|
|
import tempfile |
|
|
from typing import Tuple, List, Optional, Dict, Any |
|
|
from PIL import Image |
|
|
|
|
|
API_URL = "https://sroy46--insightsphere-wrapper.modal.run" |
|
|
|
|
|
LANGUAGES = [ |
|
|
("en", "English"), |
|
|
("zh", "Chinese"), |
|
|
|
|
|
("ko", "Korean"), |
|
|
] |
|
|
|
|
|
def language_name_to_code(name: str) -> str: |
|
|
for code, lang_name in LANGUAGES: |
|
|
if lang_name == name: |
|
|
return code |
|
|
return "en" |
|
|
|
|
|
def make_api_request(endpoint: str, payload: Dict[str, Any]) -> requests.Response: |
|
|
response = requests.post( |
|
|
f"{API_URL}{endpoint}", |
|
|
json=payload, |
|
|
timeout=None |
|
|
) |
|
|
response.raise_for_status() |
|
|
return response |
|
|
|
|
|
def run_insightsphere(query: str, chart_types: List[str], tone: str, language_name: str) -> Tuple[str, List[Any], str, Optional[pd.DataFrame], Optional[Dict]]: |
|
|
try: |
|
|
language_code = language_name_to_code(language_name) |
|
|
response = make_api_request("/interpret", {"query": query}) |
|
|
sql_query = response.json()["sql"] |
|
|
|
|
|
response = make_api_request("/collect", {"sql": sql_query}) |
|
|
raw_data = pd.read_csv(io.StringIO(base64.b64decode(response.json()["data"]).decode())) |
|
|
|
|
|
response = make_api_request("/clean", { |
|
|
"data": base64.b64encode(raw_data.to_csv(index=False).encode()).decode() |
|
|
}) |
|
|
clean_data = pd.read_csv(io.StringIO(base64.b64decode(response.json()["data"]).decode())) |
|
|
|
|
|
response = make_api_request("/analyze", { |
|
|
"data": base64.b64encode(clean_data.to_csv(index=False).encode()).decode() |
|
|
}) |
|
|
insights = response.json()["insights"] |
|
|
|
|
|
charts = [] |
|
|
clean_data_encoded = base64.b64encode(clean_data.to_csv(index=False).encode()).decode() |
|
|
for chart_type in chart_types: |
|
|
response = make_api_request("/visualize", { |
|
|
"data": clean_data_encoded, |
|
|
"insights": insights, |
|
|
"chart_type": chart_type, |
|
|
"tone": tone |
|
|
}) |
|
|
img_bytes = base64.b64decode(response.json()["image"]) |
|
|
img = Image.open(BytesIO(img_bytes)) |
|
|
charts.append(img) |
|
|
|
|
|
response = make_api_request("/summarize", { |
|
|
"insights": insights, |
|
|
"tone": tone, |
|
|
"language_code": language_code |
|
|
}) |
|
|
summary = response.json()["summary"] |
|
|
|
|
|
response = make_api_request("/recommend", { |
|
|
"insights": insights, |
|
|
"tone": tone, |
|
|
"language_code": language_code |
|
|
}) |
|
|
recommendations = response.json()["recommendations"] |
|
|
|
|
|
return summary, charts, recommendations, clean_data, insights |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"Error: {str(e)}" |
|
|
return error_msg, [], error_msg, None, None |
|
|
|
|
|
def save_pdf(summary: str, charts: List[Any], recommendations: str) -> str: |
|
|
pdf = FPDF() |
|
|
pdf.add_page() |
|
|
pdf.set_font("helvetica", size=12) |
|
|
|
|
|
def add_unicode_text(text): |
|
|
text = text.encode('latin-1', 'replace').decode('latin-1') |
|
|
pdf.multi_cell(0, 10, text) |
|
|
|
|
|
pdf.multi_cell(0, 10, "Summary:") |
|
|
add_unicode_text(summary) |
|
|
pdf.ln(10) |
|
|
|
|
|
for i, img in enumerate(charts): |
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: |
|
|
img.save(tmp.name, format="PNG") |
|
|
pdf.image(tmp.name, x=10, w=180) |
|
|
pdf.ln(10) |
|
|
try: |
|
|
os.unlink(tmp.name) |
|
|
except: |
|
|
pass |
|
|
|
|
|
pdf.multi_cell(0, 10, "Recommendations:") |
|
|
add_unicode_text(recommendations) |
|
|
|
|
|
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") |
|
|
pdf.output(tmp_file.name) |
|
|
return tmp_file.name |
|
|
|
|
|
def save_excel(data: pd.DataFrame, insights: Dict) -> str: |
|
|
try: |
|
|
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") |
|
|
with pd.ExcelWriter(tmp_file.name, engine='xlsxwriter') as writer: |
|
|
data.to_excel(writer, sheet_name='Clean Data', index=False) |
|
|
if isinstance(insights, dict): |
|
|
pd.DataFrame([insights]).to_excel(writer, sheet_name='Insights', index=False) |
|
|
return tmp_file.name |
|
|
except Exception as e: |
|
|
raise |
|
|
|
|
|
with gr.Blocks(title="InsightSphere", css=".gradio-container {max-width: 900px !important}") as demo: |
|
|
gr.Markdown("# Your AI Agent for Multilingual Query-to-Insight Data Workflows") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
query_input = gr.Textbox( |
|
|
label="Enter a query to explore your data with AI:", |
|
|
lines=2, |
|
|
value="Show the most popular baby names in the US from 1910 to 2013, ranked by total occurrences.", |
|
|
placeholder="Enter a natural language query" |
|
|
) |
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
"List the top 10 most popular male baby names by total count from 1910 to 2013 in California.", |
|
|
"Compare average income by education level across all states.", |
|
|
"Summarize total agricultural yield by crop type over the past decade.", |
|
|
"What are the top 5 counties with highest unemployment rates in 2019?" |
|
|
], |
|
|
inputs=query_input |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
tone_input = gr.Dropdown( |
|
|
label="Narrative Style for Reports", |
|
|
choices=["formal", "casual", "executive"], |
|
|
value="formal" |
|
|
) |
|
|
language_input = gr.Dropdown( |
|
|
label="Select Your Preferred Language", |
|
|
choices=[name for _, name in LANGUAGES], |
|
|
value="English" |
|
|
) |
|
|
|
|
|
chart_type_input = gr.CheckboxGroup( |
|
|
label="Select Graph Types to Visualize Insights", |
|
|
choices=["histogram", "scatter", "boxplot", "bar", "line"], |
|
|
value=["histogram"] |
|
|
) |
|
|
|
|
|
generate_btn = gr.Button("Generate Insights", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
summary_output = gr.Markdown(label="### Analysis Summary") |
|
|
rec_output = gr.Markdown(label="### Recommendations") |
|
|
chart_gallery = gr.Gallery( |
|
|
label="Generated Visualizations", |
|
|
columns=2, |
|
|
height="auto" |
|
|
) |
|
|
download_excel_btn = gr.Button("Download Data as Excel File") |
|
|
|
|
|
state = gr.State() |
|
|
|
|
|
def run_and_store(query: str, chart_types: List[str], tone: str, language: str): |
|
|
result = run_insightsphere(query, chart_types, tone, language) |
|
|
state.value = { |
|
|
"summary": result[0], |
|
|
"charts": result[1], |
|
|
"recommendations": result[2], |
|
|
"clean_data": result[3], |
|
|
"insights": result[4], |
|
|
} |
|
|
chart_names = { |
|
|
"histogram": "Distribution", |
|
|
"scatter": "Scatter Plot", |
|
|
"boxplot": "Box Plot", |
|
|
"bar": "Bar Chart", |
|
|
"line": "Line Chart" |
|
|
} |
|
|
display_images = [] |
|
|
for i, img in enumerate(result[1]): |
|
|
chart_type = chart_types[i] if i < len(chart_types) else "chart" |
|
|
display_images.append((img, f"{chart_names.get(chart_type, 'Chart')} {i+1}")) |
|
|
return result[0], display_images, result[2] |
|
|
|
|
|
generate_btn.click( |
|
|
run_and_store, |
|
|
inputs=[query_input, chart_type_input, tone_input, language_input], |
|
|
outputs=[summary_output, chart_gallery, rec_output] |
|
|
) |
|
|
|
|
|
download_excel_btn.click( |
|
|
lambda: save_excel(state.value["clean_data"], state.value["insights"]) if state.value else None, |
|
|
outputs=gr.File(label="Download Excel Report") |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(server_name="0.0.0.0", server_port=7860, share=False) |
|
|
|