oscarwang2's picture
Update app.py
6663d52 verified
raw
history blame contribute delete
No virus
6.02 kB
import pandas as pd
import os
import gradio as gr
import threading
import time
from groq import Groq
# Initialize Groq client
client = Groq()
# Constants
MAX_SIZE = 1.1 * 1024 * 1024 * 1024 # 1.1GB in bytes
DATA_DIRECTORY = 'data'
UPDATE_INTERVAL = 1 # Update interval in seconds
# Ensure the data directory exists
os.makedirs(DATA_DIRECTORY, exist_ok=True)
# Initialize variables
file_index = 1
current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
file_paths = [current_file]
combined_tokens = 0
# Helper function to get file size
def get_file_size(filename):
return os.path.getsize(filename) if os.path.isfile(filename) else 0
# Data generation and saving function
def generate_and_save_data():
global file_index, current_file, file_paths, combined_tokens
# Create the initial file if it doesn't exist
if not os.path.isfile(current_file):
pd.DataFrame(columns=["prompt", "response"]).to_csv(current_file, index=False)
while True:
try:
# Generate a prompt
completion = client.chat.completions.create(
model="mixtral-8x7b-32768",
messages=[
{
"role": "user",
"content": "give me a single prompt to prompt an ai model, simulating what users could want from you. ensure that it is diverse and high quality. for each, choose a random writing style (though it has to be a common one), random length and random clarity of the prompt. ensure that it is a single prompt, and just the prompt itself, nothing else. eg, don't close the prompt in quotation marks or say Here is a single prompt that meets your requirements or anything similar to that"
}
],
temperature=1,
max_tokens=1024,
top_p=1,
stream=True,
stop=None,
)
prompt = ""
prompt_tokens = 0
for chunk in completion:
content = chunk.choices[0].delta.content
if content:
prompt += content
prompt_tokens += len(content.split())
# Use the generated prompt to query the model again
second_completion = client.chat.completions.create(
model="mixtral-8x7b-32768",
messages=[
{
"role": "user",
"content": prompt
}
],
temperature=1,
max_tokens=5000,
top_p=1,
stream=True,
stop=None,
)
response = ""
response_tokens = 0
for chunk in second_completion:
content = chunk.choices[0].delta.content
if content:
response += content
response_tokens += len(content.split())
# Update the combined token count
combined_tokens += (prompt_tokens + response_tokens)
# Print the generated prompt and the response
print("Generated prompt:", prompt)
print("Response to the generated prompt:", response)
# Create a DataFrame with the prompt and response
data = pd.DataFrame({"prompt": [prompt], "response": [response]})
# Check the size of the current file
if get_file_size(current_file) >= MAX_SIZE:
file_index += 1
current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
file_paths.append(current_file)
# Create the new file with headers
with open(current_file, 'w') as f:
data.to_csv(f, header=True, index=False)
else:
# Append data to the current file
with open(current_file, 'a') as f:
data.to_csv(f, header=False, index=False)
# Wait for the next update interval
time.sleep(UPDATE_INTERVAL)
except Exception as e:
print(f"An error occurred: {e}. Retrying in 5 seconds...")
time.sleep(5)
# Get available files
def get_available_files():
return [f for f in file_paths if os.path.isfile(f)]
# Update file list
def update_file_list():
return gr.update(choices=get_available_files())
# Update token count
def update_token_count():
return combined_tokens
# Display file content
def display_file_content(selected_file):
if selected_file:
return pd.read_csv(selected_file)
return pd.DataFrame()
# Start the data generation in a separate thread
thread = threading.Thread(target=generate_and_save_data)
thread.daemon = True
thread.start()
# Create Gradio interface
with gr.Blocks() as app:
gr.Markdown("## AI Prompt and Response Generator")
gr.Markdown("This app continuously generates AI prompts and responses, and writes them to CSV files.")
file_selector = gr.Dropdown(label="Select a data file to view and download", choices=get_available_files())
file_viewer = gr.DataFrame(label="CSV File Content")
download_button = gr.File(label="Download Selected File")
def download_file(selected_file):
return selected_file
refresh_button = gr.Button("Refresh File List")
refresh_button.click(update_file_list, outputs=file_selector)
file_selector.change(display_file_content, inputs=file_selector, outputs=file_viewer)
file_selector.change(download_file, inputs=file_selector, outputs=download_button)
token_display = gr.Textbox(label="Combined Tokens", value=str(update_token_count()), interactive=False)
def update_token_display():
return str(update_token_count())
# Update the token count every second
token_refresh = gr.Button("Refresh Token Count")
token_refresh.click(update_token_display, outputs=token_display)
app.launch()