Atharva Thakur commited on
Commit
d9628eb
1 Parent(s): 7d0a5c7

Experiment #2

Browse files
Files changed (3) hide show
  1. Experiments.py +41 -47
  2. code.py +0 -10
  3. data.csv +0 -101
Experiments.py CHANGED
@@ -8,59 +8,53 @@ from data_code_run import DataCodeRun
8
  load_dotenv() # take environment variables from .env.
9
  os.environ['GEMINI_API_KEY'] = os.getenv("GOOGLE_API_KEY")
10
 
11
- file_path = './test_data.csv'
12
- df = pd.read_csv(file_path)
13
 
14
- string_data= df.to_string(index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- # Get column names
17
- column_names = ", ".join(df.columns.tolist())
18
 
19
- # Get data types
20
- data_types = ", ".join([f"{col}: {dtype}" for col, dtype in df.dtypes.items()])
21
 
22
- # Get number of rows and columns
23
- num_rows, num_cols = df.shape
 
 
 
 
 
 
 
24
 
25
- # Construct the dataset information string
26
- info_string = f"Dataset Information:\n"
27
- info_string += f"Columns: {column_names}\n"
28
- info_string += f"Data Types: {data_types}\n"
29
- info_string += f"Number of Rows: {num_rows}\n"
30
- info_string += f"Number of Columns: {num_cols}\n"
31
 
32
- # print(string_data)
33
- request = "total number of null values in all columns"
34
- message = f'''
35
- You are a data analyser agent working with a given dataset.
36
- Below is the info about the dataset -
37
 
38
- ========
39
- {info_string}
40
- ========
 
 
 
 
41
 
42
- Your task -
43
- write a proper prompt to tell another agent to generate code to fulfill the below request by the user.
44
- You have to give all the details about the columns involved and only the required info about the dataset needed to fulfil the request.
45
- failues are given as 0 and 1 in target column. Also tell about the file location that is 'test_data.csv'.
46
 
47
- Request :
48
- =======
49
- {request}
50
- =======
51
- Do not infer any data based on previous training, strictly use only source text given below as input.
52
-
53
- '''
54
- output = completion(
55
- model="gemini/gemini-pro",
56
- messages=[
57
- {"role": "user", "content": message}
58
- ]
59
- )
60
-
61
- print(output.choices[0].message.content)
62
-
63
- message = output.choices[0].message.content
64
- runner = DataCodeRun()
65
-
66
- runner.run_code(message)
 
8
  load_dotenv() # take environment variables from .env.
9
  os.environ['GEMINI_API_KEY'] = os.getenv("GOOGLE_API_KEY")
10
 
 
 
11
 
12
+ def LLM_summary():
13
+ file_path = './test_data.csv'
14
+ df = pd.read_csv(file_path)
15
+
16
+ string_data= df.to_string(index=False)
17
+
18
+ # Get column names
19
+ column_names = ", ".join(df.columns.tolist())
20
+
21
+ # Get data types
22
+ data_types = ", ".join([f"{col}: {dtype}" for col, dtype in df.dtypes.items()])
23
+
24
+ # Get number of rows and columns
25
+ num_rows, num_cols = df.shape
26
+
27
+ # Construct the dataset information string
28
+ info_string = f"Dataset Information:\n"
29
+ info_string += f"Columns: {column_names}\n"
30
+ info_string += f"Data Types: {data_types}\n"
31
+ info_string += f"Number of Rows: {num_rows}\n"
32
+ info_string += f"Number of Columns: {num_cols}\n"
33
 
 
 
34
 
 
 
35
 
36
+ message = f'''
37
+ You are a data analyser agent working with a given dataset.
38
+ Below is the info about the dataset -
39
+ ========
40
+ {info_string}
41
+ ========
42
+
43
+ Your task -
44
+ Write a summary report of the dataset. You have to explain what the dataset is about and what kind of information could be gained from the dataset.
45
 
 
 
 
 
 
 
46
 
47
+ Do not infer any data based on previous training, strictly use only source text given below as input.
 
 
 
 
48
 
49
+ '''
50
+ output = completion(
51
+ model="gemini/gemini-pro",
52
+ messages=[
53
+ {"role": "user", "content": message}
54
+ ]
55
+ )
56
 
57
+ print(output.choices[0].message.content)
 
 
 
58
 
59
+
60
+ LLM_summary()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
code.py DELETED
@@ -1,10 +0,0 @@
1
- import pandas as pd
2
-
3
- # Load the dataset
4
- df = pd.read_csv('test_data.csv')
5
-
6
- # Count the number of null values in each column
7
- null_counts = df.isnull().sum()
8
-
9
- # Print the total number of null values
10
- print(null_counts.sum())
 
 
 
 
 
 
 
 
 
 
 
data.csv CHANGED
@@ -1,101 +0,0 @@
1
- X,Y
2
- 3.745401188,16.4102977
3
- 9.507143064,32.92341449
4
- 7.319939418,27.14333981
5
- 5.986584842,18.9846167
6
- 1.560186404,9.241215438
7
- 1.559945203,10.39406075
8
- 0.580836122,9.698296455
9
- 8.661761458,29.94874394
10
- 6.011150117,21.41646315
11
- 7.080725778,25.23866325
12
- 0.205844943,7.448339064
13
- 9.699098522,34.75479778
14
- 8.324426408,28.91375882
15
- 2.123391107,12.39670819
16
- 1.818249672,10.64890411
17
- 1.834045099,12.43942528
18
- 3.04242243,12.7231611
19
- 5.247564316,20.08736866
20
- 4.319450186,17.17413425
21
- 2.912291402,10.80984431
22
- 6.118528947,23.9478274
23
- 1.394938607,9.706926364
24
- 2.921446485,13.77456637
25
- 3.663618433,15.52168103
26
- 4.560699842,15.85135804
27
- 7.851759614,27.7139882
28
- 1.996737822,10.30478443
29
- 5.142344384,18.82247861
30
- 4.626727484443299,22.44986564
31
- 0.464504127,7.201614095
32
- 6.075448519,26.99871736
33
- 1.705241237,10.46487934
34
- 0.65051593,7.466648571
35
- 9.488855373,33.31767429
36
- 9.656320331,30.13141856
37
- 8.083973481,19.07749914296875
38
- 3.046137692,14.2588735
39
- 0.97672114,12.85664765
40
- 6.842330265,25.14226887
41
- 4.401524937,18.8076695
42
- 1.220382348,8.591723506
43
- 4.951769101,17.51795123
44
- 0.343885211,8.317301262
45
- 9.093204021,19.07749914296875
46
- 2.587799816,14.34546334
47
- 6.625222844,23.05689362
48
- 3.117110761,17.1569209
49
- 5.200680212,17.79833851
50
- 5.467102793,22.57502257
51
- 1.848544555,14.92654492
52
- 9.695846278,32.10646618
53
- 4.626727484443299,27.12138924
54
- 9.394989416,33.38427098
55
- 8.948273504,30.8378692
56
- 5.978999788,19.8356725
57
- 9.21874235,32.793353
58
- 0.884925021,5.530167634
59
- 1.959828624,11.82667073
60
- 0.452272889,4.517970199
61
- 3.253303308,17.85977873
62
- 3.886772897,15.09381211
63
- 2.713490318,12.49634792
64
- 8.287375092,31.48915971
65
- 3.567533267,13.24087117
66
- 2.809345097,13.88295516
67
- 5.426960832,23.895168
68
- 1.40924225,19.07749914296875
69
- 8.021969808,29.43517714
70
- 0.745506437,7.756284899
71
- 9.868869366,36.17025384
72
- 7.722447693,25.69344166
73
- 1.987156815,8.32055722
74
- 0.055221171,6.209546645
75
- 8.154614285,30.0578122
76
- 7.068573438,26.70670602
77
- 7.29007168,27.56311146
78
- 4.626727484443299,26.77806096
79
- 0.740446517,7.685846946
80
- 3.584657285,16.3401168
81
- 1.158690595,7.04736895
82
- 8.631034259,34.6246518
83
- 6.232981268,24.64660965
84
- 3.308980249,12.54433375
85
- 0.635583503,8.219857726
86
- 3.109823217,12.38010631
87
- 3.25183322,16.32966887
88
- 7.296061783,29.20537651
89
- 6.375574714,22.4853595
90
- 8.872127426,33.54313454
91
- 4.722149252,19.99200961
92
- 1.195942459,10.2319477
93
- 7.132447872,30.19092958
94
- 7.607850486,27.33277523
95
- 5.612771976,20.3308436
96
- 7.7096718,26.34998654
97
- 4.937955964,18.18224732
98
- 5.227328294,20.52778146
99
- 4.275410184,18.5085345
100
- 0.254191267,6.315955401
101
- 1.07891427,19.07749914296875