geminsights / app.py
izammohammed's picture
added all of the files
dd65c5d verified
raw
history blame contribute delete
No virus
3.36 kB
import streamlit as st
import pandas as pd
import os
from utils import save_json, load_json
from markdown import markdown
from utils import load_json
from autoviz import AutoViz_Class
import base64
from google.cloud import aiplatform
import base64
import vertexai
from vertexai.preview.generative_models import GenerativeModel, Part
import json
#setup cloud
aiplatform.init(
project = "geminsights",
location="us-central1"
)
json_file = json.loads(st.secrets["credentials"], strict=False)
with open("credentials.json", "w") as f:
json.dump(json_file, f, indent=2)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json"
dataframe = None
st.title("GemInsights πŸ“Š")
st.caption('A gemini powered data analysis tool to get insights from data πŸ”₯')
file = st.file_uploader(
"Pick a dataframe", type=["csv", "xlsx"], accept_multiple_files=False
)
if file is not None:
_, extension = os.path.splitext(file.name)
if extension == ".csv":
dataframe = pd.read_csv(file)
else:
dataframe = pd.read_excel(file)
st.write(dataframe.head())
st.write(f"updated a dataframe with shape {dataframe.shape}")
if file is not None:
text_input = st.text_input(
"Enter something about the data πŸ‘‡",
label_visibility="visible",
disabled=False,
placeholder="eg:- This is a sales dataframe",
)
option = st.selectbox(
"Which is the target column ? 🎯",
tuple(list(dataframe.columns)),
index=None,
placeholder="Select one column in here",
)
def plot(dataframe, target):
AV = AutoViz_Class()
dft = AV.AutoViz(
"",
sep=",",
depVar=target,
dfte=dataframe,
header=0,
verbose=2,
lowess=False,
chart_format="jpg",
max_rows_analyzed=500,
max_cols_analyzed=20,
save_plot_dir="plots",
)
def prompt_make(dataframe, target, info):
images = []
image_dir = f"plots/{target}"
image_files = os.listdir(image_dir)
for image_file in image_files:
image_path = os.path.join(image_dir, image_file)
img = open(image_path, "rb").read()
img_bytes = Part.from_data(
base64.b64decode(base64.encodebytes(img)), mime_type="image/jpeg"
)
images.append(img_bytes)
with open("prompt.txt", "rb") as file:
data = file.read()
prompt = f"{data}\n Here are some of the informations related to the dataset - '{info}'"
# print(f"{prompt}")
# print(images)
return prompt, images
def generate_res(prompt, images):
print("prompting ...")
model = GenerativeModel("gemini-pro-vision")
responses = model.generate_content(
[prompt]+images,
generation_config={
"max_output_tokens": 2048,
"temperature": 0.4,
"top_p": 1,
"top_k": 32
},
)
return responses.text
def generate(dataframe, text_input, option):
plot(dataframe, option)
prompt, images = prompt_make(dataframe, option, text_input)
res = generate_res(prompt, images)
return res
if st.button("Get Insights", type="primary"):
st.write("generating insights ⏳ ... ")
# running the pipeline
response = generate(dataframe, text_input, option)
res = markdown(response)
st.markdown(res, unsafe_allow_html=True)
else:
st.write("")