Spaces:

izammohammed
/

geminsights

Sleeping

File size: 3,364 Bytes

dd65c5d

import streamlit as st
import pandas as pd
import os
from utils import save_json, load_json
from markdown import markdown
from utils import load_json
from autoviz import AutoViz_Class
import base64
from google.cloud import aiplatform
import base64
import vertexai
from vertexai.preview.generative_models import GenerativeModel, Part
import json

#setup cloud
aiplatform.init(
    project = "geminsights",
    location="us-central1"
    )

json_file = json.loads(st.secrets["credentials"], strict=False)
with open("credentials.json", "w") as f:
    json.dump(json_file, f, indent=2)

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json"


dataframe = None
st.title("GemInsights 📊")
st.caption('A gemini powered data analysis tool to get insights from data 🔥')
file = st.file_uploader(
    "Pick a dataframe", type=["csv", "xlsx"], accept_multiple_files=False
)

if file is not None:
    _, extension = os.path.splitext(file.name)
    if extension == ".csv":
        dataframe = pd.read_csv(file)
    else:
        dataframe = pd.read_excel(file)
    st.write(dataframe.head())
    st.write(f"updated a dataframe with shape {dataframe.shape}")

if file is not None:
    text_input = st.text_input(
        "Enter something about the data 👇",
        label_visibility="visible",
        disabled=False,
        placeholder="eg:- This is a sales dataframe",
    )

    option = st.selectbox(
        "Which is the target column ? 🎯",
        tuple(list(dataframe.columns)),
        index=None,
        placeholder="Select one column in here",
    )

def plot(dataframe, target):

    AV = AutoViz_Class()

    dft = AV.AutoViz(
    "",
    sep=",",
    depVar=target,
    dfte=dataframe,
    header=0,
    verbose=2,
    lowess=False,
    chart_format="jpg",
    max_rows_analyzed=500,
    max_cols_analyzed=20,
    save_plot_dir="plots",
    )

def prompt_make(dataframe, target, info):
    images = []
    image_dir = f"plots/{target}"
    image_files = os.listdir(image_dir)
    for image_file in image_files:
        image_path = os.path.join(image_dir, image_file)
        img = open(image_path, "rb").read()
        img_bytes = Part.from_data(
            base64.b64decode(base64.encodebytes(img)), mime_type="image/jpeg"
        )
        images.append(img_bytes)
    with open("prompt.txt", "rb") as file:
        data = file.read()
    prompt = f"{data}\n Here are some of the informations related to the dataset - '{info}'"
    
    # print(f"{prompt}")
    # print(images)
    return prompt, images

def generate_res(prompt, images):
    print("prompting ...")
    model = GenerativeModel("gemini-pro-vision")
    responses = model.generate_content(
        [prompt]+images,
        generation_config={
            "max_output_tokens": 2048,
            "temperature": 0.4,
            "top_p": 1,
            "top_k": 32
        },
    )
    return responses.text



def generate(dataframe, text_input, option):
    plot(dataframe, option)
    prompt, images = prompt_make(dataframe, option, text_input)
    res = generate_res(prompt, images)
    return res

if st.button("Get Insights", type="primary"):
    st.write("generating insights ⏳ ... ")
    # running the pipeline

    response = generate(dataframe, text_input, option)
    res = markdown(response)
    st.markdown(res, unsafe_allow_html=True)

else:
    st.write("")