Spaces:
Sleeping
Sleeping
Atharva Thakur
commited on
Commit
•
e3fe4bf
1
Parent(s):
b42e2e7
Some more experiments with dataparty
Browse files- code.py +10 -18
- data_code_run.py +4 -3
- test.py +30 -24
- test2.py +2 -2
- test3.py +4 -0
code.py
CHANGED
@@ -1,24 +1,16 @@
|
|
1 |
-
import
|
2 |
import seaborn as sns
|
3 |
-
|
4 |
|
5 |
-
# Load the
|
6 |
-
|
7 |
|
8 |
-
#
|
9 |
-
|
10 |
-
plt.xlabel('Sepal Length')
|
11 |
-
plt.ylabel('Count')
|
12 |
-
plt.title('Distribution of Sepal Length')
|
13 |
-
plt.show()
|
14 |
|
15 |
-
#
|
16 |
-
sns.scatterplot(
|
17 |
-
plt.xlabel('Sepal Length')
|
18 |
-
plt.ylabel('Sepal Width')
|
19 |
-
plt.title('Relationship between Sepal Length and Sepal Width')
|
20 |
plt.show()
|
21 |
|
22 |
-
#
|
23 |
-
|
24 |
-
plt.show()
|
|
|
1 |
+
import pandas as pd
|
2 |
import seaborn as sns
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
|
5 |
+
# Load the dataset
|
6 |
+
df = pd.read_csv('test_data.csv')
|
7 |
|
8 |
+
# Check the correlation between 'Air temperature [K]' and 'Target'
|
9 |
+
corr = df['Air temperature [K]'].corr(df['Target'])
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
# Plot the scatter plot
|
12 |
+
sns.scatterplot(x='Air temperature [K]', y='Target', data=df)
|
|
|
|
|
|
|
13 |
plt.show()
|
14 |
|
15 |
+
# Print the correlation coefficient
|
16 |
+
print('Correlation coefficient:', corr)
|
|
data_code_run.py
CHANGED
@@ -8,13 +8,14 @@ from python_interpreter import PythonInterpreter, run_interpreter
|
|
8 |
load_dotenv() # take environment variables from .env.
|
9 |
|
10 |
class DataCodeRun:
|
11 |
-
def __init__(self
|
12 |
-
|
13 |
|
14 |
def run_code(self):
|
15 |
os.environ['GEMINI_API_KEY'] = os.getenv("GOOGLE_API_KEY")
|
16 |
|
17 |
-
message =
|
|
|
18 |
output = completion(
|
19 |
model="gemini/gemini-pro",
|
20 |
messages=[
|
|
|
8 |
load_dotenv() # take environment variables from .env.
|
9 |
|
10 |
class DataCodeRun:
|
11 |
+
def __init__(self):
|
12 |
+
pass
|
13 |
|
14 |
def run_code(self):
|
15 |
os.environ['GEMINI_API_KEY'] = os.getenv("GOOGLE_API_KEY")
|
16 |
|
17 |
+
message = '''generate the code to find the relation between 'Air temperature [K]' and 'Target' columns of the given dataset. The 'Target' column holds failure prediction values as 0 (no failure) and 1 (failure). the name of the dataset is test_data.csv .
|
18 |
+
'''
|
19 |
output = completion(
|
20 |
model="gemini/gemini-pro",
|
21 |
messages=[
|
test.py
CHANGED
@@ -9,41 +9,47 @@ load_dotenv() # take environment variables from .env.
|
|
9 |
os.environ['GEMINI_API_KEY'] = os.getenv("GOOGLE_API_KEY")
|
10 |
|
11 |
file_path = './test_data.csv'
|
12 |
-
|
13 |
-
|
14 |
-
string_data=
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
29 |
|
30 |
# print(string_data)
|
31 |
-
|
32 |
message = f'''
|
33 |
You are a data analyser agent working with a given dataset.
|
34 |
Below is the info about the dataset -
|
35 |
|
36 |
========
|
37 |
-
{
|
38 |
========
|
39 |
|
40 |
Your task -
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
Do not infer any data based on previous training, strictly use only source text given below as input.
|
44 |
-
|
45 |
-
{string_data}
|
46 |
-
========
|
47 |
'''
|
48 |
output = completion(
|
49 |
model="gemini/gemini-pro",
|
|
|
9 |
os.environ['GEMINI_API_KEY'] = os.getenv("GOOGLE_API_KEY")
|
10 |
|
11 |
file_path = './test_data.csv'
|
12 |
+
df = pd.read_csv(file_path)
|
13 |
+
|
14 |
+
string_data= df.to_string(index=False)
|
15 |
+
|
16 |
+
# Get column names
|
17 |
+
column_names = ", ".join(df.columns.tolist())
|
18 |
+
|
19 |
+
# Get data types
|
20 |
+
data_types = ", ".join([f"{col}: {dtype}" for col, dtype in df.dtypes.items()])
|
21 |
+
|
22 |
+
# Get number of rows and columns
|
23 |
+
num_rows, num_cols = df.shape
|
24 |
+
|
25 |
+
# Construct the dataset information string
|
26 |
+
info_string = f"Dataset Information:\n"
|
27 |
+
info_string += f"Columns: {column_names}\n"
|
28 |
+
info_string += f"Data Types: {data_types}\n"
|
29 |
+
info_string += f"Number of Rows: {num_rows}\n"
|
30 |
+
info_string += f"Number of Columns: {num_cols}\n"
|
31 |
|
32 |
# print(string_data)
|
33 |
+
request = "I want find relation between Air Temperature and Target"
|
34 |
message = f'''
|
35 |
You are a data analyser agent working with a given dataset.
|
36 |
Below is the info about the dataset -
|
37 |
|
38 |
========
|
39 |
+
{info_string}
|
40 |
========
|
41 |
|
42 |
Your task -
|
43 |
+
write a proper prompt to tell another agent to generate code to fulfill the below request by the user.
|
44 |
+
You have to give all the details about the columns involved and only the required info about the dataset needed to fulfil the request.
|
45 |
+
failues are given as 0 and 1 in target column.
|
46 |
+
|
47 |
+
Request :
|
48 |
+
=======
|
49 |
+
{request}
|
50 |
+
=======
|
51 |
Do not infer any data based on previous training, strictly use only source text given below as input.
|
52 |
+
|
|
|
|
|
53 |
'''
|
54 |
output = completion(
|
55 |
model="gemini/gemini-pro",
|
test2.py
CHANGED
@@ -10,8 +10,8 @@ load_dotenv() # take environment variables from .env.
|
|
10 |
|
11 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
12 |
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY)
|
13 |
-
csv_agent = create_csv_agent(llm,"
|
14 |
-
question = "
|
15 |
if question:
|
16 |
response = csv_agent.run(question)
|
17 |
print(response)
|
|
|
10 |
|
11 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
12 |
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY)
|
13 |
+
csv_agent = create_csv_agent(llm,"test_data.csv", verbose=True)
|
14 |
+
question = "what is the relation between air temperature and target"
|
15 |
if question:
|
16 |
response = csv_agent.run(question)
|
17 |
print(response)
|
test3.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from data_code_run import DataCodeRun
|
2 |
+
|
3 |
+
d = DataCodeRun()
|
4 |
+
d.run_code()
|