from litellm import completion from dotenv import load_dotenv import os import pandas as pd from python_interpreter import PythonInterpreter, run_interpreter from data_code_run import DataCodeRun load_dotenv() # take environment variables from .env. os.environ['GEMINI_API_KEY'] = os.getenv("GOOGLE_API_KEY") def LLM_summary(): file_path = './test_data.csv' df = pd.read_csv(file_path) string_data= df.to_string(index=False) # Get column names column_names = ", ".join(df.columns.tolist()) # Get data types data_types = ", ".join([f"{col}: {dtype}" for col, dtype in df.dtypes.items()]) # Get number of rows and columns num_rows, num_cols = df.shape # Construct the dataset information string info_string = f"Dataset Information:\n" info_string += f"Columns: {column_names}\n" info_string += f"Data Types: {data_types}\n" info_string += f"Number of Rows: {num_rows}\n" info_string += f"Number of Columns: {num_cols}\n" message = f''' You are a data analyser agent working with a given dataset. Below is the info about the dataset - ======== {info_string} ======== Your task - Write a summary report of the dataset. You have to explain what the dataset is about and what kind of information could be gained from the dataset. Do not infer any data based on previous training, strictly use only source text given below as input. ''' output = completion( model="gemini/gemini-pro", messages=[ {"role": "user", "content": message} ] ) print(output.choices[0].message.content) LLM_summary()