Spaces:

MicroHealth
/

autodata-visualizer

Paused

App Files Files Community

bluenevus commited on Apr 11

Commit

4fc79a4

verified ·

1 Parent(s): 1f5fb09

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -128

app.py CHANGED Viewed

@@ -1,133 +1,109 @@
-from fastapi import FastAPI, File, UploadFile, HTTPException, Form
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import HTMLResponse
-from fastapi.staticfiles import StaticFiles
 import pandas as pd
 import matplotlib.pyplot as plt
-import seaborn as sns
-import os
-import logging
-from huggingface_hub import InferenceClient
-from dotenv import load_dotenv
-import hashlib
-# Set up logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Load environment variables
-load_dotenv()
-app = FastAPI()
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-app.mount("/static", StaticFiles(directory="static"), name="static")
-API_TOKEN = os.getenv("HF_TOKEN")
-if not API_TOKEN:
-    raise ValueError("HF_TOKEN environment variable not set.")
-MODEL_NAME = "gemini-2.5-pro-preview-03-25"
-client = InferenceClient(model=MODEL_NAME, token=API_TOKEN)
-UPLOAD_DIR = "uploads"
-os.makedirs(UPLOAD_DIR, exist_ok=True)
-IMAGES_DIR = os.path.join("static", "images")
-os.makedirs(IMAGES_DIR, exist_ok=True)
-@app.post("/upload/")
-async def upload_file(file: UploadFile = File(...)):
-    if not file.filename.endswith((".xlsx", ".csv")):
-        raise HTTPException(status_code=400, detail="File must be an Excel (.xlsx) or CSV file")
-    file_path = os.path.join(UPLOAD_DIR, file.filename)
-    with open(file_path, "wb") as buffer:
-        buffer.write(await file.read())
-    logger.info(f"File uploaded: {file.filename}")
-    return {"filename": file.filename}
-@app.post("/generate-visualization/")
-async def generate_visualization(prompt: str = Form(...), filename: str = Form(...)):
-    file_path = os.path.join(UPLOAD_DIR, filename)
-    if not os.path.exists(file_path):
-        raise HTTPException(status_code=404, detail="File not found on server.")
-    try:
-        if filename.endswith('.csv'):
-            df = pd.read_csv(file_path)
-        else:
-            df = pd.read_excel(file_path)
-        if df.empty:
-            raise ValueError("File is empty.")
-    except Exception as e:
-        raise HTTPException(status_code=400, detail=f"Error reading file: {str(e)}")
-    input_text = f"""
-    Given the DataFrame 'df' with columns {', '.join(df.columns)} and preview:
-    {df.head().to_string()}
-    Write Python code to: {prompt}
-    - Use ONLY 'df' (no external data loading).
-    - Use pandas (pd), matplotlib.pyplot (plt), or seaborn (sns).
-    - Include axis labels and a title.
-    - Output ONLY executable code (no comments, functions, print, or triple quotes).
     """
-    try:
-        generated_code = client.text_generation(input_text, max_new_tokens=500)
-        logger.info(f"Generated code:\n{generated_code}")
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error querying model: {str(e)}")
-    if not generated_code.strip():
-        raise HTTPException(status_code=500, detail="No code generated by the AI model.")
-    generated_code = generated_code.strip()
-    if generated_code.startswith('"""') or generated_code.startswith("'''"):
-        generated_code = generated_code.split('"""')[1] if '"""' in generated_code else generated_code.split("'''")[1]
-    if generated_code.endswith('"""') or generated_code.endswith("'''"):
-        generated_code = generated_code.rsplit('"""')[0] if '"""' in generated_code else generated_code.rsplit("'''")[0]
-    generated_code = generated_code.strip()
-    lines = generated_code.splitlines()
-    executable_code = "\n".join(
-        line.strip() for line in lines
-        if line.strip() and not line.strip().startswith(('#', 'def', 'class', '"""', "'''"))
-        and not any(kw in line for kw in ["pd.read_csv", "pd.read_excel", "http", "raise", "print"])
-    ).strip()
-    executable_code = executable_code.replace("plt.show()", "").strip()
-    logger.info(f"Executable code:\n{executable_code}")
-    plot_hash = hashlib.md5(f"{filename}_{prompt}".encode()).hexdigest()[:8]
-    plot_filename = f"plot_{plot_hash}.png"
-    plot_path = os.path.join(IMAGES_DIR, plot_filename)
-    try:
-        exec_globals = {"pd": pd, "plt": plt, "sns": sns, "df": df}
-        exec(executable_code, exec_globals)
-        plt.savefig(plot_path, bbox_inches="tight")
         plt.close()
-    except Exception as e:
-        logger.error(f"Error executing code:\n{executable_code}\nException: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Error executing code: {str(e)}")
-    if not os.path.exists(plot_path):
-        raise HTTPException(status_code=500, detail="Plot file was not created.")
-    return {"plot_url": f"/static/images/{plot_filename}", "generated_code": generated_code}
-@app.get("/")
-async def serve_frontend():
-    with open("static/index.html", "r") as f:
-        return HTMLResponse(content=f.read())

+import gradio as gr
 import pandas as pd
 import matplotlib.pyplot as plt
+import io
+import base64
+import google.generativeai as genai
+def process_file(api_key, file, instructions):
+    # Set up Gemini API
+    genai.configure(api_key=api_key)
+    model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
+    # Read the file
+    if file.name.endswith('.csv'):
+        df = pd.read_csv(file.name)
+    else:
+        df = pd.read_excel(file.name)
+    # Analyze data and get visualization suggestions from Gemini
+    data_description = df.describe().to_string()
+    columns_info = "\n".join([f"{col}: {df[col].dtype}" for col in df.columns])
+    prompt = f"""
+    Given this dataset:
+    Columns and types:
+    {columns_info}
+    Data summary:
+    {data_description}
+    User instructions: {instructions if instructions else 'No specific instructions provided.'}
+    Suggest 3 ways to visualize this data. For each visualization:
+    1. Describe the visualization type and what it will show.
+    2. Provide Python code using matplotlib to create the visualization.
+    3. Explain why this visualization is useful for understanding the data.
+    Format your response as:
+    Visualization 1:
+    Description: ...
+    Code: ...
+    Explanation: ...
+    Visualization 2:
+    ...
+    Visualization 3:
+    ...
     """
+    response = model.generate_content(prompt)
+    suggestions = response.text.split("Visualization")
+    visualizations = []
+    for i, suggestion in enumerate(suggestions[1:4], 1):  # Process only the first 3 visualizations
+        parts = suggestion.split("Code:")
+        description = parts[0].strip()
+        code = parts[1].split("Explanation:")[0].strip()
+        # Execute the code
+        plt.figure(figsize=(10, 6))
+        exec(code)
+        plt.title(f"Visualization {i}")
+        # Save the plot to a BytesIO object
+        buf = io.BytesIO()
+        plt.savefig(buf, format='png')
+        buf.seek(0)
+        img_str = base64.b64encode(buf.getvalue()).decode()
         plt.close()
+        visualizations.append((f"data:image/png;base64,{img_str}", description, code))
+    return visualizations
+# Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Data Visualization with Gemini")
+    api_key = gr.Textbox(label="Enter Gemini API Key", type="password")
+    file = gr.File(label="Upload Excel or CSV file")
+    instructions = gr.Textbox(label="Optional visualization instructions")
+    submit = gr.Button("Generate Visualizations")
+    with gr.Row():
+        output1 = gr.Image(label="Visualization 1")
+        output2 = gr.Image(label="Visualization 2")
+        output3 = gr.Image(label="Visualization 3")
+    with gr.Row():
+        desc1 = gr.Textbox(label="Description 1")
+        desc2 = gr.Textbox(label="Description 2")
+        desc3 = gr.Textbox(label="Description 3")
+    with gr.Row():
+        code1 = gr.Code(language="python", label="Code 1")
+        code2 = gr.Code(language="python", label="Code 2")
+        code3 = gr.Code(language="python", label="Code 3")
+    submit.click(
+        fn=process_file,
+        inputs=[api_key, file, instructions],
+        outputs=[
+            output1, desc1, code1,
+            output2, desc2, code2,
+            output3, desc3, code3
+        ],
+        show_progress=True,
+    )
+demo.launch()