oscarwang2's picture
Update app.py
ad19bf8 verified
raw
history blame contribute delete
No virus
6 kB
import pandas as pd
import os
import gradio as gr
import threading
import time
from groq import Groq
# Initialize Groq client
client = Groq()
# Constants
MAX_SIZE = 1.1 * 1024 * 1024 * 1024 # 1.1GB in bytes
DATA_DIRECTORY = 'data'
UPDATE_INTERVAL = 1 # Update interval in seconds
# Ensure the data directory exists
os.makedirs(DATA_DIRECTORY, exist_ok=True)
# Initialize variables
file_index = 1
current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
file_paths = [current_file]
combined_tokens = 0
# Helper function to get file size
def get_file_size(filename):
return os.path.getsize(filename) if os.path.isfile(filename) else 0
# Data generation and saving function
def generate_and_save_data():
global file_index, current_file, file_paths, combined_tokens
# Create the initial file if it doesn't exist
if not os.path.isfile(current_file):
pd.DataFrame(columns=["prompt", "response"]).to_csv(current_file, index=False)
while True:
try:
# Generate a prompt
completion = client.chat.completions.create(
model="gemma2-9b-it",
messages=[
{
"role": "user",
"content": "give me a single prompt to prompt an ai model, simulating what users could want from you. ensure that it is diverse and high quality. for each, choose a random writing style (though it has to be a common one), random length and random clarity of the prompt. ensure that it is a single prompt, and just the prompt itself, nothing else. eg, don't close the prompt in quotation marks or say Here is a single prompt that meets your requirements or anything similar to that"
}
],
temperature=1,
max_tokens=1024,
top_p=1,
stream=True,
stop=None,
)
prompt = ""
prompt_tokens = 0
for chunk in completion:
content = chunk.choices[0].delta.content
if content:
prompt += content
prompt_tokens += len(content.split())
# Use the generated prompt to query the model again
second_completion = client.chat.completions.create(
model="gemma2-9b-it",
messages=[
{
"role": "user",
"content": prompt
}
],
temperature=1,
max_tokens=5000,
top_p=1,
stream=True,
stop=None,
)
response = ""
response_tokens = 0
for chunk in second_completion:
content = chunk.choices[0].delta.content
if content:
response += content
response_tokens += len(content.split())
# Update the combined token count
combined_tokens += (prompt_tokens + response_tokens)
# Print the generated prompt and the response
print("Generated prompt:", prompt)
print("Response to the generated prompt:", response)
# Create a DataFrame with the prompt and response
data = pd.DataFrame({"prompt": [prompt], "response": [response]})
# Check the size of the current file
if get_file_size(current_file) >= MAX_SIZE:
file_index += 1
current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
file_paths.append(current_file)
# Create the new file with headers
with open(current_file, 'w') as f:
data.to_csv(f, header=True, index=False)
else:
# Append data to the current file
with open(current_file, 'a') as f:
data.to_csv(f, header=False, index=False)
# Wait for the next update interval
time.sleep(UPDATE_INTERVAL)
except Exception as e:
print(f"An error occurred: {e}. Retrying in 5 seconds...")
time.sleep(5)
# Get available files
def get_available_files():
return [f for f in file_paths if os.path.isfile(f)]
# Update file list
def update_file_list():
return gr.update(choices=get_available_files())
# Update token count
def update_token_count():
return combined_tokens
# Display file content
def display_file_content(selected_file):
if selected_file:
return pd.read_csv(selected_file)
return pd.DataFrame()
# Start the data generation in a separate thread
thread = threading.Thread(target=generate_and_save_data)
thread.daemon = True
thread.start()
# Create Gradio interface
with gr.Blocks() as app:
gr.Markdown("## AI Prompt and Response Generator")
gr.Markdown("This app continuously generates AI prompts and responses, and writes them to CSV files.")
file_selector = gr.Dropdown(label="Select a data file to view and download", choices=get_available_files())
file_viewer = gr.DataFrame(label="CSV File Content")
download_button = gr.File(label="Download Selected File")
def download_file(selected_file):
return selected_file
refresh_button = gr.Button("Refresh File List")
refresh_button.click(update_file_list, outputs=file_selector)
file_selector.change(display_file_content, inputs=file_selector, outputs=file_viewer)
file_selector.change(download_file, inputs=file_selector, outputs=download_button)
token_display = gr.Textbox(label="Combined Tokens", value=str(update_token_count()), interactive=False)
def update_token_display():
return str(update_token_count())
# Update the token count every second
token_refresh = gr.Button("Refresh Token Count")
token_refresh.click(update_token_display, outputs=token_display)
app.launch()