yourbench / app.py
sumuks's picture
feat: push
09c48e8
import os
import shutil
from datetime import datetime
import gradio as gr
import pandas as pd
import time
import random
import uuid
def save_uploaded_files(files, session_id):
"""Save uploaded files to telemetry directory with session ID."""
save_dir = os.path.join("telemetry_files", session_id)
os.makedirs(save_dir, exist_ok=True)
saved_paths = []
for file in files:
if file is not None:
filename = os.path.basename(file.name)
save_path = os.path.join(save_dir, filename)
shutil.copy2(file.name, save_path)
saved_paths.append(save_path)
return saved_paths
def mock_process_documents(files, chunk_size, num_questions, question_types, complexity_types,
difficulty, selected_models):
"""Mock processing function that simulates document processing."""
time.sleep(5) # Simulate 5 seconds of processing
# Create session ID and save files
session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
saved_files = save_uploaded_files(files, session_id)
data = []
for _ in range(num_questions):
# Since question_types is now a list of selected values, we can use it directly
question_type = random.choice(question_types)
complexity = random.choice(complexity_types)
model = random.choice(selected_models)
question = f"[{complexity}] Sample {question_type} question {_+1} (Difficulty: {difficulty:.1f}, Model: {model})"
answer = f"This is a sample answer for question {_+1}. Files processed: {', '.join(saved_files)}"
data.append({
"question_type": question_type,
"complexity": complexity,
"question": question,
"answer": answer,
"model": model,
"difficulty": difficulty
})
return pd.DataFrame(data)
def generate_csv_file(df, session_id):
"""Generate and save CSV file for the results."""
if df.empty:
return None
# Create session directory
session_dir = os.path.join("telemetry_files", session_id)
os.makedirs(session_dir, exist_ok=True)
# Save CSV
csv_path = os.path.join(session_dir, "results.csv")
df.to_csv(csv_path, index=False)
return csv_path
def process_files(
input_files, chunk_size, num_questions,
question_types_dict, complexity_types_dict,
difficulty_level, model_selection_dict
):
"""Process files with the given configuration."""
if not input_files:
return pd.DataFrame(), "Error: No files uploaded", None
# Convert checkbox groups to lists of selected values
question_types = question_types_dict
complexity_types = complexity_types_dict
selected_models = model_selection_dict
if not question_types or not complexity_types or not selected_models:
return pd.DataFrame(), "Error: Please select at least one option from each category", None
start_time = time.time()
results_df = mock_process_documents(
input_files, chunk_size, num_questions,
question_types, complexity_types,
difficulty_level, selected_models
)
processing_time = time.time() - start_time
# Generate CSV file
session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
csv_path = generate_csv_file(results_df, session_id)
return (
results_df,
f"Processing completed in {processing_time:.2f} seconds",
csv_path if csv_path else None
)
# Create custom theme
theme = gr.themes.Base(
primary_hue="blue",
secondary_hue="indigo",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
radius_size=gr.themes.sizes.radius_sm,
).set(
body_background_fill="*neutral_50",
body_background_fill_dark="*neutral_950",
button_primary_background_fill="*primary_600",
button_primary_background_fill_hover="*primary_700",
button_primary_text_color="white",
button_primary_text_color_dark="white",
block_label_text_weight="600",
block_title_text_weight="600",
input_background_fill="white",
input_background_fill_dark="*neutral_800",
input_border_color="*neutral_200",
input_border_color_dark="*neutral_700",
)
# Create the Gradio interface
with gr.Blocks(
title="Yourbench - Dynamic Question Generation",
theme=theme,
css="""
.gradio-container {max-width: 1400px !important; margin-left: auto; margin-right: auto}
.contain { display: flex; flex-direction: column; }
.contain > * { flex: 1}
.gap { margin-top: 1rem !important }
footer {display: none !important}
.citation-box {
background-color: #f8fafc;
border: 1px solid #e2e8f0;
border-radius: 0.5rem;
padding: 1rem;
margin-top: 2rem;
font-family: monospace;
}
.citation-box pre {
margin: 0;
white-space: pre-wrap;
}
.main-panel { min-height: 600px }
.output-panel { min-height: 400px }
.checkbox-group { max-height: 200px; overflow-y: auto }
.model-select { max-height: 150px }
.download-btn { margin-top: 1rem !important }
"""
) as demo:
# Header with description
gr.Markdown("""
# πŸ“š Yourbench: Dynamic Question Generation Tool
Generate high-quality questions and answers from your documents using state-of-the-art language models.
This tool helps create diverse question types with varying complexity levels, perfect for educational
assessment and content understanding.
""")
with gr.Row():
# Left column for configuration
with gr.Column(scale=2, elem_classes="main-panel"):
# Document Upload Section
with gr.Group():
gr.Markdown("### πŸ“„ Document Upload")
input_files = gr.File(
label="Upload Documents (PDF/TXT)",
file_types=[".txt", ".pdf"],
file_count="multiple",
elem_id="file_upload",
scale=2
)
# Core Parameters Section
with gr.Group():
gr.Markdown("### βš™οΈ Core Parameters")
with gr.Row():
chunk_size = gr.Slider(
minimum=100,
maximum=1000,
value=500,
step=50,
label="Chunk Size",
info="Number of tokens per chunk",
elem_id="chunk_size"
)
num_questions = gr.Slider(
minimum=1,
maximum=20,
value=5,
step=1,
label="Number of Questions",
info="How many questions to generate",
elem_id="num_questions"
)
difficulty_level = gr.Slider(
minimum=1,
maximum=5,
value=3,
step=0.1,
label="Average Difficulty",
info="1: Easy, 5: Very Hard",
elem_id="difficulty"
)
with gr.Row():
# Question Types Section
with gr.Column():
gr.Markdown("### 🎯 Question Types")
question_types_dict = gr.CheckboxGroup(
choices=[
"Analytical", "Application Based", "Conceptual",
"Counterfactual", "Factual", "Open Ended",
"True False", "False Premise", "Clarification",
"Edge Case"
],
value=["Analytical", "Factual", "Conceptual", "Application Based"],
label="Select Types",
elem_id="question_types",
elem_classes="checkbox-group"
)
# Complexity and Models Section
with gr.Column():
with gr.Group():
gr.Markdown("### πŸ”„ Complexity")
complexity_types_dict = gr.CheckboxGroup(
choices=["Single Shot", "Multi Hop"],
value=["Single Shot", "Multi Hop"],
label="Select Complexity",
elem_id="complexity_types"
)
with gr.Group():
gr.Markdown("### πŸ€– Models")
model_selection_dict = gr.CheckboxGroup(
choices=[
"Mistral Large",
"Llama-3 70B",
"GPT-4",
"Claude 3.5 Sonnet",
"Gemini Pro"
],
value=["Mistral Large", "GPT-4", "Claude 3.5 Sonnet"],
label="Select Models",
elem_id="models",
elem_classes="model-select"
)
process_btn = gr.Button(
"πŸš€ Generate Questions",
variant="primary",
size="lg",
elem_id="generate_btn"
)
# Right column for outputs
with gr.Column(scale=3, elem_classes="output-panel"):
with gr.Group():
gr.Markdown("### πŸ“Š Generated Questions")
output_status = gr.Textbox(
label="Status",
elem_id="status"
)
output_table = gr.Dataframe(
headers=["question_type", "complexity", "question", "answer", "model", "difficulty"],
label="Questions and Answers",
elem_id="results_table",
wrap=True
)
csv_output = gr.File(
label="Download Results",
elem_id="csv_download",
elem_classes="download-btn",
interactive=False
)
# Instructions Section
with gr.Accordion("πŸ“ Instructions", open=False):
gr.Markdown("""
1. **Upload Documents**: Support for .txt and .pdf files
2. **Configure Parameters**:
- Set chunk size for document processing
- Choose number of questions to generate
- Adjust difficulty level (1: Easy to 5: Very Hard)
3. **Select Question Types**: Choose from various question categories
4. **Set Complexity**: Single-shot or multi-hop reasoning
5. **Choose Models**: Select AI models for ensemble generation
6. Click 'πŸš€ Generate Questions' to start
7. Download results as CSV for further use
""")
# Citation Section
gr.Markdown("""
### πŸ“š Citation
If you find this work helpful in your research or applications, please cite:
""")
with gr.Group(elem_classes="citation-box"):
gr.Markdown("""```bibtex
@misc{yourbench2024,
title={Yourbench: A Dynamic Question Generation Framework for Document Understanding},
author={Your Team},
year={2024},
publisher={GitHub},
journal={GitHub repository},
howpublished={\\url{https://github.com/yourbench/yourbench}},
}
```""")
# API Information
gr.Markdown("""
### πŸ”Œ API Usage
This tool can be used programmatically through its API. Here's how to interact with it:
```python
import gradio_client
client = gradio_client.Client("YOUR_SPACE_URL")
result = client.predict(
["document.pdf"], # Input files
500, # Chunk size
5, # Number of questions
["Analytical", "Factual"], # Question types
["Single Shot"], # Complexity types
3.0, # Difficulty level
["GPT-4", "Claude 3.5 Sonnet"], # Models
api_name="/predict"
)
```
Replace `YOUR_SPACE_URL` with the actual deployment URL. The API endpoint accepts the same parameters
as the web interface and returns a tuple containing the results DataFrame, status message, and CSV file path.
""")
# Event handler
process_btn.click(
process_files,
inputs=[
input_files, chunk_size, num_questions,
question_types_dict, complexity_types_dict,
difficulty_level, model_selection_dict
],
outputs=[output_table, output_status, csv_output]
)
if __name__ == "__main__":
demo.launch(share=True)