Spaces:

Garvitj
/

data

Sleeping

File size: 5,518 Bytes

import streamlit as st
import pandas as pd
import json
import subprocess
try:
    import plotly.express as px
except ModuleNotFoundError:
    subprocess.run(["pip", "install", "plotly"])
    import plotly.express as px
import re
import io
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load DeepSeek Model
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")

def query_deepseek(prompt):
    """
    Query the DeepSeek model and return the response.
    """
    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=150)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.strip()

def extract_json(text):
    """
    Extract JSON from the DeepSeek response using regex.
    """
    match = re.search(r"\{.*\}", text, re.DOTALL)
    if match:
        try:
            return json.loads(match.group(0))
        except json.JSONDecodeError:
            st.error("JSON Decode Error!")
            return None
    return None

def get_visualization_suggestion(data):
    """
    Send dataset columns to DeepSeek and get suggestions for visualization.
    """
    prompt = f"""
    I have the following dataset columns: {', '.join(data.columns)}.
    Suggest the best type of visualization for this dataset.
    Return only a valid JSON response in the following format:
    {{
        "x": "column_name",
        "y": "column_name",
        "chart_type": "bar/line/scatter/pie"
    }}
    """
    response = query_deepseek(prompt)
    return extract_json(response)

def extract_csv_from_response(response):
    """
    Dynamically extract CSV data from a response string.
    """
    lines = response.splitlines()
    csv_data = [line.strip() for line in lines if '"' in line and ',' in line]
    return '\n'.join(csv_data) if csv_data else None

def generate_demo_data_csv(user_input, num_rows=10):
    """Generates realistic demo data using the LLM in valid CSV format."""
    prompt = f"""
    Generate a structured dataset with {num_rows} rows based on the following request:
    "{user_input}"
    Ensure the response is in valid CSV format, with column headers and quoted text values.
    """
    response = query_deepseek(prompt).strip()
    csv_data = extract_csv_from_response(response)
    
    if csv_data:
        try:
            df = pd.read_csv(io.StringIO(csv_data))
            file_path = "generated_data.csv"
            df.to_csv(file_path, index=False)
            return "Demo data generated as CSV.", file_path
        except Exception as e:
            return f"Error: Invalid CSV format. {str(e)}", None
    else:
        return "Error: No valid CSV data found in the response.", None

def query_sql_generator(user_query):
    """Generate SQL queries from natural language."""
    prompt = f"I just want a SQL Query corresponding to: {user_query} and no explanation."
    return query_deepseek(prompt)

# Streamlit UI
st.set_page_config(page_title="AI-Powered Dashboard", layout="wide")
st.title("🤖 AI-Powered Multi-Feature Dashboard")

# Sidebar for navigation
st.sidebar.title("Navigation")
option = st.sidebar.radio("Select Feature", ["📊 Data Visualization", "🧠 SQL Query Generator", "📄 Demo Data Generator"])

if option == "📊 Data Visualization":
    uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
    if uploaded_file is not None:
        df = pd.read_csv(uploaded_file)
        st.write("### Preview of Data")
        st.dataframe(df.head())
        
        with st.spinner("Getting visualization suggestions from DeepSeek..."):
            suggestion = get_visualization_suggestion(df)
        
        if suggestion:
            chart_type, x_col, y_col = suggestion.get("chart_type"), suggestion.get("x"), suggestion.get("y")
            
            if x_col not in df.columns or y_col not in df.columns:
                st.error("DeepSeek suggested invalid column names.")
            else:
                st.write(f"### Suggested Chart: {chart_type.capitalize()} Chart")
                chart_map = {
                    "bar": px.bar,
                    "line": px.line,
                    "scatter": px.scatter,
                    "pie": lambda df, x, y: px.pie(df, names=x, values=y)
                }
                if chart_type in chart_map:
                    fig = chart_map[chart_type](df, x=x_col, y=y_col, title=f"{x_col} vs {y_col}")
                    st.plotly_chart(fig)
                else:
                    st.error("Unsupported chart type suggested.")

elif option == "🧠 SQL Query Generator":
    text_input = st.text_area("Enter your Query here in Plain English:")
    if st.button("Generate SQL Query"):
        with st.spinner("Generating SQL Query..."):
            st.write(query_sql_generator(text_input))

elif option == "📄 Demo Data Generator":
    user_input = st.text_area("Describe the dataset you want:")
    num_rows = st.number_input("Number of rows", min_value=1, max_value=1000, value=10)
    if st.button("Generate Dataset"):
        with st.spinner("Generating Demo Data..."):
            message, file_path = generate_demo_data_csv(user_input, num_rows)
        st.write(message)
        if file_path:
            st.download_button("Download CSV", open(file_path, "rb"), file_name="generated_data.csv", mime="text/csv")