import pandas as pd import os import gradio as gr import threading import time from groq import Groq # Initialize Groq client client = Groq() # Constants MAX_SIZE = 1.1 * 1024 * 1024 * 1024 # 1.1GB in bytes DATA_DIRECTORY = 'data' UPDATE_INTERVAL = 1 # Update interval in seconds # Ensure the data directory exists os.makedirs(DATA_DIRECTORY, exist_ok=True) # Initialize variables file_index = 1 current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv') file_paths = [current_file] combined_tokens = 0 # Helper function to get file size def get_file_size(filename): return os.path.getsize(filename) if os.path.isfile(filename) else 0 # Data generation and saving function def generate_and_save_data(): global file_index, current_file, file_paths, combined_tokens # Create the initial file if it doesn't exist if not os.path.isfile(current_file): pd.DataFrame(columns=["prompt", "response"]).to_csv(current_file, index=False) while True: try: # Generate a prompt completion = client.chat.completions.create( model="gemma2-9b-it", messages=[ { "role": "user", "content": "give me a single prompt to prompt an ai model, simulating what users could want from you. ensure that it is diverse and high quality. for each, choose a random writing style (though it has to be a common one), random length and random clarity of the prompt. ensure that it is a single prompt, and just the prompt itself, nothing else. eg, don't close the prompt in quotation marks or say Here is a single prompt that meets your requirements or anything similar to that" } ], temperature=1, max_tokens=1024, top_p=1, stream=True, stop=None, ) prompt = "" prompt_tokens = 0 for chunk in completion: content = chunk.choices[0].delta.content if content: prompt += content prompt_tokens += len(content.split()) # Use the generated prompt to query the model again second_completion = client.chat.completions.create( model="gemma2-9b-it", messages=[ { "role": "user", "content": prompt } ], temperature=1, max_tokens=5000, top_p=1, stream=True, stop=None, ) response = "" response_tokens = 0 for chunk in second_completion: content = chunk.choices[0].delta.content if content: response += content response_tokens += len(content.split()) # Update the combined token count combined_tokens += (prompt_tokens + response_tokens) # Print the generated prompt and the response print("Generated prompt:", prompt) print("Response to the generated prompt:", response) # Create a DataFrame with the prompt and response data = pd.DataFrame({"prompt": [prompt], "response": [response]}) # Check the size of the current file if get_file_size(current_file) >= MAX_SIZE: file_index += 1 current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv') file_paths.append(current_file) # Create the new file with headers with open(current_file, 'w') as f: data.to_csv(f, header=True, index=False) else: # Append data to the current file with open(current_file, 'a') as f: data.to_csv(f, header=False, index=False) # Wait for the next update interval time.sleep(UPDATE_INTERVAL) except Exception as e: print(f"An error occurred: {e}. Retrying in 5 seconds...") time.sleep(5) # Get available files def get_available_files(): return [f for f in file_paths if os.path.isfile(f)] # Update file list def update_file_list(): return gr.update(choices=get_available_files()) # Update token count def update_token_count(): return combined_tokens # Display file content def display_file_content(selected_file): if selected_file: return pd.read_csv(selected_file) return pd.DataFrame() # Start the data generation in a separate thread thread = threading.Thread(target=generate_and_save_data) thread.daemon = True thread.start() # Create Gradio interface with gr.Blocks() as app: gr.Markdown("## AI Prompt and Response Generator") gr.Markdown("This app continuously generates AI prompts and responses, and writes them to CSV files.") file_selector = gr.Dropdown(label="Select a data file to view and download", choices=get_available_files()) file_viewer = gr.DataFrame(label="CSV File Content") download_button = gr.File(label="Download Selected File") def download_file(selected_file): return selected_file refresh_button = gr.Button("Refresh File List") refresh_button.click(update_file_list, outputs=file_selector) file_selector.change(display_file_content, inputs=file_selector, outputs=file_viewer) file_selector.change(download_file, inputs=file_selector, outputs=download_button) token_display = gr.Textbox(label="Combined Tokens", value=str(update_token_count()), interactive=False) def update_token_display(): return str(update_token_count()) # Update the token count every second token_refresh = gr.Button("Refresh Token Count") token_refresh.click(update_token_display, outputs=token_display) app.launch()