import pandas as pd
import os
import gradio as gr
import threading
import time
from groq import Groq

# Initialize Groq client
client = Groq()

# Constants
MAX_SIZE = 1.1 * 1024 * 1024 * 1024  # 1.1GB in bytes
DATA_DIRECTORY = 'data'
UPDATE_INTERVAL = 1  # Update interval in seconds

# Ensure the data directory exists
os.makedirs(DATA_DIRECTORY, exist_ok=True)

# Initialize variables
file_index = 1
current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
file_paths = [current_file]
combined_tokens = 0

# Helper function to get file size
def get_file_size(filename):
    return os.path.getsize(filename) if os.path.isfile(filename) else 0

# Data generation and saving function
def generate_and_save_data():
    global file_index, current_file, file_paths, combined_tokens
    
    # Create the initial file if it doesn't exist
    if not os.path.isfile(current_file):
        pd.DataFrame(columns=["prompt", "response"]).to_csv(current_file, index=False)

    while True:
        try:
            # Generate a prompt
            completion = client.chat.completions.create(
                model="gemma2-9b-it",
                messages=[
                    {
                        "role": "user",
                        "content": "give me a single prompt to prompt an ai model, simulating what users could want from you. ensure that it is diverse and high quality. for each, choose a random writing style (though it has to be a common one), random length and random clarity of the prompt. ensure that it is a single prompt, and just the prompt itself, nothing else. eg, don't close the prompt in quotation marks or say Here is a single prompt that meets your requirements or anything similar to that"
                    }
                ],
                temperature=1,
                max_tokens=1024,
                top_p=1,
                stream=True,
                stop=None,
            )

            prompt = ""
            prompt_tokens = 0
            for chunk in completion:
                content = chunk.choices[0].delta.content
                if content:
                    prompt += content
                    prompt_tokens += len(content.split())

            # Use the generated prompt to query the model again
            second_completion = client.chat.completions.create(
                model="gemma2-9b-it",
                messages=[
                    {
                        "role": "user",
                        "content": prompt
                    }
                ],
                temperature=1,
                max_tokens=5000,
                top_p=1,
                stream=True,
                stop=None,
            )

            response = ""
            response_tokens = 0
            for chunk in second_completion:
                content = chunk.choices[0].delta.content
                if content:
                    response += content
                    response_tokens += len(content.split())

            # Update the combined token count
            combined_tokens += (prompt_tokens + response_tokens)

            # Print the generated prompt and the response
            print("Generated prompt:", prompt)
            print("Response to the generated prompt:", response)

            # Create a DataFrame with the prompt and response
            data = pd.DataFrame({"prompt": [prompt], "response": [response]})

            # Check the size of the current file
            if get_file_size(current_file) >= MAX_SIZE:
                file_index += 1
                current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
                file_paths.append(current_file)
                # Create the new file with headers
                with open(current_file, 'w') as f:
                    data.to_csv(f, header=True, index=False)
            else:
                # Append data to the current file
                with open(current_file, 'a') as f:
                    data.to_csv(f, header=False, index=False)

            # Wait for the next update interval
            time.sleep(UPDATE_INTERVAL)

        except Exception as e:
            print(f"An error occurred: {e}. Retrying in 5 seconds...")
            time.sleep(5)

# Get available files
def get_available_files():
    return [f for f in file_paths if os.path.isfile(f)]

# Update file list
def update_file_list():
    return gr.update(choices=get_available_files())

# Update token count
def update_token_count():
    return combined_tokens

# Display file content
def display_file_content(selected_file):
    if selected_file:
        return pd.read_csv(selected_file)
    return pd.DataFrame()

# Start the data generation in a separate thread
thread = threading.Thread(target=generate_and_save_data)
thread.daemon = True
thread.start()

# Create Gradio interface
with gr.Blocks() as app:
    gr.Markdown("## AI Prompt and Response Generator")
    gr.Markdown("This app continuously generates AI prompts and responses, and writes them to CSV files.")
    
    file_selector = gr.Dropdown(label="Select a data file to view and download", choices=get_available_files())
    file_viewer = gr.DataFrame(label="CSV File Content")
    download_button = gr.File(label="Download Selected File")
    
    def download_file(selected_file):
        return selected_file

    refresh_button = gr.Button("Refresh File List")
    refresh_button.click(update_file_list, outputs=file_selector)
    file_selector.change(display_file_content, inputs=file_selector, outputs=file_viewer)
    file_selector.change(download_file, inputs=file_selector, outputs=download_button)
    
    token_display = gr.Textbox(label="Combined Tokens", value=str(update_token_count()), interactive=False)
    
    def update_token_display():
        return str(update_token_count())

    # Update the token count every second
    token_refresh = gr.Button("Refresh Token Count")
    token_refresh.click(update_token_display, outputs=token_display)

app.launch()