import pandas as pd import os import gradio as gr import threading import time from groq import Groq # Initialize Groq client client = Groq() # Constants MAX_SIZE = 1.1 * 1024 * 1024 * 1024 # 1.1GB in bytes DATA_DIRECTORY = 'data' UPDATE_INTERVAL = 1 # Update interval in seconds # Ensure the data directory exists os.makedirs(DATA_DIRECTORY, exist_ok=True) # Initialize variables file_index = 1 current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv') file_paths = [current_file] combined_tokens = 0 # Helper function to get file size def get_file_size(filename): return os.path.getsize(filename) if os.path.isfile(filename) else 0 # Data generation and saving function def generate_and_save_data(): global file_index, current_file, file_paths, combined_tokens # Create the initial file if it doesn't exist if not os.path.isfile(current_file): pd.DataFrame(columns=["prompt", "response"]).to_csv(current_file, index=False) while True: try: # Generate a prompt completion = client.chat.completions.create( model="llama3-groq-70b-8192-tool-use-preview", messages=[ { "role": "user", "content": "give me a single prompt to prompt an ai model, simulating what users could want from you, either everyday conversations or MMLU styled questions. ensure that it is diverse and high quality. for each, choose a random writing style (though it has to be a common one), random length and random clarity of the prompt. ensure that it is a single prompt, and just the prompt itself, nothing else. eg, don't close the prompt in quotation marks or say Here is a single prompt that meets your requirements or anything similar to that" } ], temperature=1, max_tokens=1024, top_p=1, stream=True, stop=None, ) prompt = "" prompt_tokens = 0 for chunk in completion: content = chunk.choices[0].delta.content if content: prompt += content prompt_tokens += len(content.split()) # Use the generated prompt to query the model again second_completion = client.chat.completions.create( model="llama3-groq-70b-8192-tool-use-preview", messages=[ { "role": "user", "content": prompt } ], temperature=1, max_tokens=5000, top_p=1, stream=True, stop=None, ) response = "" response_tokens = 0 for chunk in second_completion: content = chunk.choices[0].delta.content if content: response += content response_tokens += len(content.split()) # Update the combined token count combined_tokens += (prompt_tokens + response_tokens) # Print the generated prompt and the response print("Generated prompt:", prompt) print("Response to the generated prompt:", response) # Create a DataFrame with the prompt and response data = pd.DataFrame({"prompt": [prompt], "response": [response]}) # Check the size of the current file if get_file_size(current_file) >= MAX_SIZE: file_index += 1 current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv') file_paths.append(current_file) # Create the new file with headers with open(current_file, 'w') as f: data.to_csv(f, header=True, index=False) else: # Append data to the current file with open(current_file, 'a') as f: data.to_csv(f, header=False, index=False) # Wait for the next update interval time.sleep(UPDATE_INTERVAL) except Exception as e: print(f"An error occurred: {e}. Retrying in 5 seconds...") time.sleep(5) # Get available files def get_available_files(): return [f for f in file_paths if os.path.isfile(f)] # Update file list def update_file_list(): return gr.update(choices=get_available_files()) # Update token count def update_token_count(): return combined_tokens # Display file content def display_file_content(selected_file): if selected_file: return pd.read_csv(selected_file) return pd.DataFrame() # Start the data generation in a separate thread thread = threading.Thread(target=generate_and_save_data) thread.daemon = True thread.start() # Create Gradio interface with gr.Blocks() as app: gr.Markdown("## AI Prompt and Response Generator") gr.Markdown("This app continuously generates AI prompts and responses, and writes them to CSV files.") file_selector = gr.Dropdown(label="Select a data file to view and download", choices=get_available_files()) file_viewer = gr.DataFrame(label="CSV File Content") download_button = gr.File(label="Download Selected File") def download_file(selected_file): return selected_file refresh_button = gr.Button("Refresh File List") refresh_button.click(update_file_list, outputs=file_selector) file_selector.change(display_file_content, inputs=file_selector, outputs=file_viewer) file_selector.change(download_file, inputs=file_selector, outputs=download_button) token_display = gr.Textbox(label="Combined Tokens", value=str(update_token_count()), interactive=False) def update_token_display(): return str(update_token_count()) # Update the token count every second token_refresh = gr.Button("Refresh Token Count") token_refresh.click(update_token_display, outputs=token_display) app.launch()