File size: 3,364 Bytes
dd65c5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import streamlit as st
import pandas as pd
import os
from utils import save_json, load_json
from markdown import markdown
from utils import load_json
from autoviz import AutoViz_Class
import base64
from google.cloud import aiplatform
import base64
import vertexai
from vertexai.preview.generative_models import GenerativeModel, Part
import json

#setup cloud
aiplatform.init(
    project = "geminsights",
    location="us-central1"
    )

json_file = json.loads(st.secrets["credentials"], strict=False)
with open("credentials.json", "w") as f:
    json.dump(json_file, f, indent=2)

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json"


dataframe = None
st.title("GemInsights πŸ“Š")
st.caption('A gemini powered data analysis tool to get insights from data πŸ”₯')
file = st.file_uploader(
    "Pick a dataframe", type=["csv", "xlsx"], accept_multiple_files=False
)

if file is not None:
    _, extension = os.path.splitext(file.name)
    if extension == ".csv":
        dataframe = pd.read_csv(file)
    else:
        dataframe = pd.read_excel(file)
    st.write(dataframe.head())
    st.write(f"updated a dataframe with shape {dataframe.shape}")

if file is not None:
    text_input = st.text_input(
        "Enter something about the data πŸ‘‡",
        label_visibility="visible",
        disabled=False,
        placeholder="eg:- This is a sales dataframe",
    )

    option = st.selectbox(
        "Which is the target column ? 🎯",
        tuple(list(dataframe.columns)),
        index=None,
        placeholder="Select one column in here",
    )

def plot(dataframe, target):

    AV = AutoViz_Class()

    dft = AV.AutoViz(
    "",
    sep=",",
    depVar=target,
    dfte=dataframe,
    header=0,
    verbose=2,
    lowess=False,
    chart_format="jpg",
    max_rows_analyzed=500,
    max_cols_analyzed=20,
    save_plot_dir="plots",
    )

def prompt_make(dataframe, target, info):
    images = []
    image_dir = f"plots/{target}"
    image_files = os.listdir(image_dir)
    for image_file in image_files:
        image_path = os.path.join(image_dir, image_file)
        img = open(image_path, "rb").read()
        img_bytes = Part.from_data(
            base64.b64decode(base64.encodebytes(img)), mime_type="image/jpeg"
        )
        images.append(img_bytes)
    with open("prompt.txt", "rb") as file:
        data = file.read()
    prompt = f"{data}\n Here are some of the informations related to the dataset - '{info}'"
    
    # print(f"{prompt}")
    # print(images)
    return prompt, images

def generate_res(prompt, images):
    print("prompting ...")
    model = GenerativeModel("gemini-pro-vision")
    responses = model.generate_content(
        [prompt]+images,
        generation_config={
            "max_output_tokens": 2048,
            "temperature": 0.4,
            "top_p": 1,
            "top_k": 32
        },
    )
    return responses.text



def generate(dataframe, text_input, option):
    plot(dataframe, option)
    prompt, images = prompt_make(dataframe, option, text_input)
    res = generate_res(prompt, images)
    return res

if st.button("Get Insights", type="primary"):
    st.write("generating insights ⏳ ... ")
    # running the pipeline

    response = generate(dataframe, text_input, option)
    res = markdown(response)
    st.markdown(res, unsafe_allow_html=True)

else:
    st.write("")