Spaces:
Sleeping
Sleeping
Atharva Thakur
commited on
Commit
•
d9628eb
1
Parent(s):
7d0a5c7
Experiment #2
Browse files- Experiments.py +41 -47
- code.py +0 -10
- data.csv +0 -101
Experiments.py
CHANGED
@@ -8,59 +8,53 @@ from data_code_run import DataCodeRun
|
|
8 |
load_dotenv() # take environment variables from .env.
|
9 |
os.environ['GEMINI_API_KEY'] = os.getenv("GOOGLE_API_KEY")
|
10 |
|
11 |
-
file_path = './test_data.csv'
|
12 |
-
df = pd.read_csv(file_path)
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
-
# Get column names
|
17 |
-
column_names = ", ".join(df.columns.tolist())
|
18 |
|
19 |
-
# Get data types
|
20 |
-
data_types = ", ".join([f"{col}: {dtype}" for col, dtype in df.dtypes.items()])
|
21 |
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
-
# Construct the dataset information string
|
26 |
-
info_string = f"Dataset Information:\n"
|
27 |
-
info_string += f"Columns: {column_names}\n"
|
28 |
-
info_string += f"Data Types: {data_types}\n"
|
29 |
-
info_string += f"Number of Rows: {num_rows}\n"
|
30 |
-
info_string += f"Number of Columns: {num_cols}\n"
|
31 |
|
32 |
-
|
33 |
-
request = "total number of null values in all columns"
|
34 |
-
message = f'''
|
35 |
-
You are a data analyser agent working with a given dataset.
|
36 |
-
Below is the info about the dataset -
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
|
43 |
-
write a proper prompt to tell another agent to generate code to fulfill the below request by the user.
|
44 |
-
You have to give all the details about the columns involved and only the required info about the dataset needed to fulfil the request.
|
45 |
-
failues are given as 0 and 1 in target column. Also tell about the file location that is 'test_data.csv'.
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
{request}
|
50 |
-
=======
|
51 |
-
Do not infer any data based on previous training, strictly use only source text given below as input.
|
52 |
-
|
53 |
-
'''
|
54 |
-
output = completion(
|
55 |
-
model="gemini/gemini-pro",
|
56 |
-
messages=[
|
57 |
-
{"role": "user", "content": message}
|
58 |
-
]
|
59 |
-
)
|
60 |
-
|
61 |
-
print(output.choices[0].message.content)
|
62 |
-
|
63 |
-
message = output.choices[0].message.content
|
64 |
-
runner = DataCodeRun()
|
65 |
-
|
66 |
-
runner.run_code(message)
|
|
|
8 |
load_dotenv() # take environment variables from .env.
|
9 |
os.environ['GEMINI_API_KEY'] = os.getenv("GOOGLE_API_KEY")
|
10 |
|
|
|
|
|
11 |
|
12 |
+
def LLM_summary():
|
13 |
+
file_path = './test_data.csv'
|
14 |
+
df = pd.read_csv(file_path)
|
15 |
+
|
16 |
+
string_data= df.to_string(index=False)
|
17 |
+
|
18 |
+
# Get column names
|
19 |
+
column_names = ", ".join(df.columns.tolist())
|
20 |
+
|
21 |
+
# Get data types
|
22 |
+
data_types = ", ".join([f"{col}: {dtype}" for col, dtype in df.dtypes.items()])
|
23 |
+
|
24 |
+
# Get number of rows and columns
|
25 |
+
num_rows, num_cols = df.shape
|
26 |
+
|
27 |
+
# Construct the dataset information string
|
28 |
+
info_string = f"Dataset Information:\n"
|
29 |
+
info_string += f"Columns: {column_names}\n"
|
30 |
+
info_string += f"Data Types: {data_types}\n"
|
31 |
+
info_string += f"Number of Rows: {num_rows}\n"
|
32 |
+
info_string += f"Number of Columns: {num_cols}\n"
|
33 |
|
|
|
|
|
34 |
|
|
|
|
|
35 |
|
36 |
+
message = f'''
|
37 |
+
You are a data analyser agent working with a given dataset.
|
38 |
+
Below is the info about the dataset -
|
39 |
+
========
|
40 |
+
{info_string}
|
41 |
+
========
|
42 |
+
|
43 |
+
Your task -
|
44 |
+
Write a summary report of the dataset. You have to explain what the dataset is about and what kind of information could be gained from the dataset.
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
+
Do not infer any data based on previous training, strictly use only source text given below as input.
|
|
|
|
|
|
|
|
|
48 |
|
49 |
+
'''
|
50 |
+
output = completion(
|
51 |
+
model="gemini/gemini-pro",
|
52 |
+
messages=[
|
53 |
+
{"role": "user", "content": message}
|
54 |
+
]
|
55 |
+
)
|
56 |
|
57 |
+
print(output.choices[0].message.content)
|
|
|
|
|
|
|
58 |
|
59 |
+
|
60 |
+
LLM_summary()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
code.py
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
|
3 |
-
# Load the dataset
|
4 |
-
df = pd.read_csv('test_data.csv')
|
5 |
-
|
6 |
-
# Count the number of null values in each column
|
7 |
-
null_counts = df.isnull().sum()
|
8 |
-
|
9 |
-
# Print the total number of null values
|
10 |
-
print(null_counts.sum())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data.csv
CHANGED
@@ -1,101 +0,0 @@
|
|
1 |
-
X,Y
|
2 |
-
3.745401188,16.4102977
|
3 |
-
9.507143064,32.92341449
|
4 |
-
7.319939418,27.14333981
|
5 |
-
5.986584842,18.9846167
|
6 |
-
1.560186404,9.241215438
|
7 |
-
1.559945203,10.39406075
|
8 |
-
0.580836122,9.698296455
|
9 |
-
8.661761458,29.94874394
|
10 |
-
6.011150117,21.41646315
|
11 |
-
7.080725778,25.23866325
|
12 |
-
0.205844943,7.448339064
|
13 |
-
9.699098522,34.75479778
|
14 |
-
8.324426408,28.91375882
|
15 |
-
2.123391107,12.39670819
|
16 |
-
1.818249672,10.64890411
|
17 |
-
1.834045099,12.43942528
|
18 |
-
3.04242243,12.7231611
|
19 |
-
5.247564316,20.08736866
|
20 |
-
4.319450186,17.17413425
|
21 |
-
2.912291402,10.80984431
|
22 |
-
6.118528947,23.9478274
|
23 |
-
1.394938607,9.706926364
|
24 |
-
2.921446485,13.77456637
|
25 |
-
3.663618433,15.52168103
|
26 |
-
4.560699842,15.85135804
|
27 |
-
7.851759614,27.7139882
|
28 |
-
1.996737822,10.30478443
|
29 |
-
5.142344384,18.82247861
|
30 |
-
4.626727484443299,22.44986564
|
31 |
-
0.464504127,7.201614095
|
32 |
-
6.075448519,26.99871736
|
33 |
-
1.705241237,10.46487934
|
34 |
-
0.65051593,7.466648571
|
35 |
-
9.488855373,33.31767429
|
36 |
-
9.656320331,30.13141856
|
37 |
-
8.083973481,19.07749914296875
|
38 |
-
3.046137692,14.2588735
|
39 |
-
0.97672114,12.85664765
|
40 |
-
6.842330265,25.14226887
|
41 |
-
4.401524937,18.8076695
|
42 |
-
1.220382348,8.591723506
|
43 |
-
4.951769101,17.51795123
|
44 |
-
0.343885211,8.317301262
|
45 |
-
9.093204021,19.07749914296875
|
46 |
-
2.587799816,14.34546334
|
47 |
-
6.625222844,23.05689362
|
48 |
-
3.117110761,17.1569209
|
49 |
-
5.200680212,17.79833851
|
50 |
-
5.467102793,22.57502257
|
51 |
-
1.848544555,14.92654492
|
52 |
-
9.695846278,32.10646618
|
53 |
-
4.626727484443299,27.12138924
|
54 |
-
9.394989416,33.38427098
|
55 |
-
8.948273504,30.8378692
|
56 |
-
5.978999788,19.8356725
|
57 |
-
9.21874235,32.793353
|
58 |
-
0.884925021,5.530167634
|
59 |
-
1.959828624,11.82667073
|
60 |
-
0.452272889,4.517970199
|
61 |
-
3.253303308,17.85977873
|
62 |
-
3.886772897,15.09381211
|
63 |
-
2.713490318,12.49634792
|
64 |
-
8.287375092,31.48915971
|
65 |
-
3.567533267,13.24087117
|
66 |
-
2.809345097,13.88295516
|
67 |
-
5.426960832,23.895168
|
68 |
-
1.40924225,19.07749914296875
|
69 |
-
8.021969808,29.43517714
|
70 |
-
0.745506437,7.756284899
|
71 |
-
9.868869366,36.17025384
|
72 |
-
7.722447693,25.69344166
|
73 |
-
1.987156815,8.32055722
|
74 |
-
0.055221171,6.209546645
|
75 |
-
8.154614285,30.0578122
|
76 |
-
7.068573438,26.70670602
|
77 |
-
7.29007168,27.56311146
|
78 |
-
4.626727484443299,26.77806096
|
79 |
-
0.740446517,7.685846946
|
80 |
-
3.584657285,16.3401168
|
81 |
-
1.158690595,7.04736895
|
82 |
-
8.631034259,34.6246518
|
83 |
-
6.232981268,24.64660965
|
84 |
-
3.308980249,12.54433375
|
85 |
-
0.635583503,8.219857726
|
86 |
-
3.109823217,12.38010631
|
87 |
-
3.25183322,16.32966887
|
88 |
-
7.296061783,29.20537651
|
89 |
-
6.375574714,22.4853595
|
90 |
-
8.872127426,33.54313454
|
91 |
-
4.722149252,19.99200961
|
92 |
-
1.195942459,10.2319477
|
93 |
-
7.132447872,30.19092958
|
94 |
-
7.607850486,27.33277523
|
95 |
-
5.612771976,20.3308436
|
96 |
-
7.7096718,26.34998654
|
97 |
-
4.937955964,18.18224732
|
98 |
-
5.227328294,20.52778146
|
99 |
-
4.275410184,18.5085345
|
100 |
-
0.254191267,6.315955401
|
101 |
-
1.07891427,19.07749914296875
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|