Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- app.py +102 -0
- requirements.txt +6 -0
app.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import asyncio
|
4 |
+
import random
|
5 |
+
|
6 |
+
loop = asyncio.new_event_loop()
|
7 |
+
asyncio.set_event_loop(loop)
|
8 |
+
|
9 |
+
import sketch
|
10 |
+
import streamlit.components.v1 as components
|
11 |
+
from IPython.display import HTML, display
|
12 |
+
import uuid
|
13 |
+
import base64
|
14 |
+
import json
|
15 |
+
|
16 |
+
st.title("Ask Questions to Data")
|
17 |
+
st.markdown("##### Demo Application powered by sketch package")
|
18 |
+
st.sidebar.image("https://avatars.githubusercontent.com/u/106505054?s=200&v=4", width=100)
|
19 |
+
st.sidebar.title("About the Package used")
|
20 |
+
st.sidebar.markdown("##### Sketch is an AI code-writing assistant for pandas users that understands the context of the data, greatly improving the relevance of suggestions. Sketch is usable in seconds and doesn't require adding a plugin to IDE.")
|
21 |
+
|
22 |
+
st.sidebar.title("How it works:")
|
23 |
+
st.sidebar.markdown("##### Sketch uses efficient approximation algorithms (data sketches) to quickly summarize the data, and feed that information into language models. Right now, it does this by summarizing the columns and writing these summary statistics as additional context to be used by the code-writing prompt. In the future, the dev team hopes to feed these sketches directly into custom made data + language foundation models to get more accurate results.")
|
24 |
+
|
25 |
+
st.sidebar.title("Usecases:")
|
26 |
+
st.sidebar.markdown("##### Data Catalogging: General tagging (eg. PII identification), Metadata generation (names and descriptions)")
|
27 |
+
st.sidebar.markdown("##### Data Engineering: Data cleaning and masking (compliance), Derived feature creation and extraction")
|
28 |
+
st.sidebar.markdown("##### Data Analysis: Data questions, Data Visualizations")
|
29 |
+
|
30 |
+
st.sidebar.caption("Github Repository: https://github.com/approximatelabs/sketch")
|
31 |
+
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
def upload_data_file():
|
37 |
+
st.session_state.file = None
|
38 |
+
st.session_state.df = None
|
39 |
+
file = st.file_uploader(
|
40 |
+
label='Upload Data File',
|
41 |
+
type=["csv","xlsx","xls"]
|
42 |
+
)
|
43 |
+
if file is not None:
|
44 |
+
load_data(file)
|
45 |
+
|
46 |
+
|
47 |
+
def load_data(file):
|
48 |
+
st.session_state.file = file
|
49 |
+
df = pd.read_csv(file)
|
50 |
+
st.session_state.df = df
|
51 |
+
|
52 |
+
|
53 |
+
# Configure session state
|
54 |
+
if 'file' not in st.session_state:
|
55 |
+
st.session_state.file = None
|
56 |
+
if 'df' not in st.session_state:
|
57 |
+
st.session_state.df = None
|
58 |
+
|
59 |
+
|
60 |
+
if st.session_state.file is None:
|
61 |
+
upload_data_file()
|
62 |
+
|
63 |
+
|
64 |
+
def to_b64(data):
|
65 |
+
return base64.b64encode(json.dumps(data).encode("utf-8")).decode("utf-8")
|
66 |
+
|
67 |
+
if st.session_state.file is not None:
|
68 |
+
st.session_state.file.seek(0)
|
69 |
+
|
70 |
+
df = pd.read_csv(st.session_state.file)
|
71 |
+
|
72 |
+
st.header("Uploaded Data:")
|
73 |
+
st.dataframe(df)
|
74 |
+
|
75 |
+
with st.form("my_form"):
|
76 |
+
request_type = st.radio(
|
77 |
+
label="Selection Panel",
|
78 |
+
options=['Ask question about the data', 'Generate codes for new analysis'],
|
79 |
+
index=0
|
80 |
+
)
|
81 |
+
|
82 |
+
request = st.text_area(
|
83 |
+
label="Input your request",
|
84 |
+
value="",
|
85 |
+
height=50,
|
86 |
+
max_chars=500
|
87 |
+
)
|
88 |
+
|
89 |
+
submitted = st.form_submit_button("Submit")
|
90 |
+
|
91 |
+
if submitted:
|
92 |
+
if request_type== 'Ask question about the data':
|
93 |
+
if request != "":
|
94 |
+
answer = df.sketch.ask(request, call_display=False)
|
95 |
+
st.code(answer)
|
96 |
+
else:
|
97 |
+
if request != "":
|
98 |
+
answer1 = df.sketch.howto(request, call_display=False)
|
99 |
+
st.code(answer1)
|
100 |
+
|
101 |
+
else:
|
102 |
+
st.write('Please upload data file in order to ask questions to it.')
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.18.1
|
2 |
+
ydata-profiling[notebook,unicode,pyspark]
|
3 |
+
xlrd==2.0.1
|
4 |
+
streamlit-option-menu==0.3.2
|
5 |
+
sketch
|
6 |
+
sweetviz==2.1.4
|