|
import os |
|
import shutil |
|
from datetime import datetime |
|
import gradio as gr |
|
import pandas as pd |
|
import time |
|
import random |
|
import uuid |
|
|
|
def save_uploaded_files(files, session_id): |
|
"""Save uploaded files to telemetry directory with session ID.""" |
|
save_dir = os.path.join("telemetry_files", session_id) |
|
os.makedirs(save_dir, exist_ok=True) |
|
|
|
saved_paths = [] |
|
for file in files: |
|
if file is not None: |
|
filename = os.path.basename(file.name) |
|
save_path = os.path.join(save_dir, filename) |
|
shutil.copy2(file.name, save_path) |
|
saved_paths.append(save_path) |
|
|
|
return saved_paths |
|
|
|
def mock_process_documents(files, chunk_size, num_questions, question_types, complexity_types, |
|
difficulty, selected_models): |
|
"""Mock processing function that simulates document processing.""" |
|
time.sleep(5) |
|
|
|
|
|
session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}" |
|
saved_files = save_uploaded_files(files, session_id) |
|
|
|
data = [] |
|
for _ in range(num_questions): |
|
|
|
question_type = random.choice(question_types) |
|
complexity = random.choice(complexity_types) |
|
model = random.choice(selected_models) |
|
|
|
question = f"[{complexity}] Sample {question_type} question {_+1} (Difficulty: {difficulty:.1f}, Model: {model})" |
|
answer = f"This is a sample answer for question {_+1}. Files processed: {', '.join(saved_files)}" |
|
data.append({ |
|
"question_type": question_type, |
|
"complexity": complexity, |
|
"question": question, |
|
"answer": answer, |
|
"model": model, |
|
"difficulty": difficulty |
|
}) |
|
|
|
return pd.DataFrame(data) |
|
|
|
def generate_csv_file(df, session_id): |
|
"""Generate and save CSV file for the results.""" |
|
if df.empty: |
|
return None |
|
|
|
|
|
session_dir = os.path.join("telemetry_files", session_id) |
|
os.makedirs(session_dir, exist_ok=True) |
|
|
|
|
|
csv_path = os.path.join(session_dir, "results.csv") |
|
df.to_csv(csv_path, index=False) |
|
return csv_path |
|
|
|
def process_files( |
|
input_files, chunk_size, num_questions, |
|
question_types_dict, complexity_types_dict, |
|
difficulty_level, model_selection_dict |
|
): |
|
"""Process files with the given configuration.""" |
|
if not input_files: |
|
return pd.DataFrame(), "Error: No files uploaded", None |
|
|
|
|
|
question_types = question_types_dict |
|
complexity_types = complexity_types_dict |
|
selected_models = model_selection_dict |
|
|
|
if not question_types or not complexity_types or not selected_models: |
|
return pd.DataFrame(), "Error: Please select at least one option from each category", None |
|
|
|
start_time = time.time() |
|
results_df = mock_process_documents( |
|
input_files, chunk_size, num_questions, |
|
question_types, complexity_types, |
|
difficulty_level, selected_models |
|
) |
|
processing_time = time.time() - start_time |
|
|
|
|
|
session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}" |
|
csv_path = generate_csv_file(results_df, session_id) |
|
|
|
return ( |
|
results_df, |
|
f"Processing completed in {processing_time:.2f} seconds", |
|
csv_path if csv_path else None |
|
) |
|
|
|
|
|
theme = gr.themes.Base( |
|
primary_hue="blue", |
|
secondary_hue="indigo", |
|
neutral_hue="slate", |
|
font=gr.themes.GoogleFont("Inter"), |
|
radius_size=gr.themes.sizes.radius_sm, |
|
).set( |
|
body_background_fill="*neutral_50", |
|
body_background_fill_dark="*neutral_950", |
|
button_primary_background_fill="*primary_600", |
|
button_primary_background_fill_hover="*primary_700", |
|
button_primary_text_color="white", |
|
button_primary_text_color_dark="white", |
|
block_label_text_weight="600", |
|
block_title_text_weight="600", |
|
input_background_fill="white", |
|
input_background_fill_dark="*neutral_800", |
|
input_border_color="*neutral_200", |
|
input_border_color_dark="*neutral_700", |
|
) |
|
|
|
|
|
with gr.Blocks( |
|
title="Yourbench - Dynamic Question Generation", |
|
theme=theme, |
|
css=""" |
|
.gradio-container {max-width: 1400px !important; margin-left: auto; margin-right: auto} |
|
.contain { display: flex; flex-direction: column; } |
|
.contain > * { flex: 1} |
|
.gap { margin-top: 1rem !important } |
|
footer {display: none !important} |
|
.citation-box { |
|
background-color: #f8fafc; |
|
border: 1px solid #e2e8f0; |
|
border-radius: 0.5rem; |
|
padding: 1rem; |
|
margin-top: 2rem; |
|
font-family: monospace; |
|
} |
|
.citation-box pre { |
|
margin: 0; |
|
white-space: pre-wrap; |
|
} |
|
.main-panel { min-height: 600px } |
|
.output-panel { min-height: 400px } |
|
.checkbox-group { max-height: 200px; overflow-y: auto } |
|
.model-select { max-height: 150px } |
|
.download-btn { margin-top: 1rem !important } |
|
""" |
|
) as demo: |
|
|
|
gr.Markdown(""" |
|
# π Yourbench: Dynamic Question Generation Tool |
|
|
|
Generate high-quality questions and answers from your documents using state-of-the-art language models. |
|
This tool helps create diverse question types with varying complexity levels, perfect for educational |
|
assessment and content understanding. |
|
""") |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(scale=2, elem_classes="main-panel"): |
|
|
|
with gr.Group(): |
|
gr.Markdown("### π Document Upload") |
|
input_files = gr.File( |
|
label="Upload Documents (PDF/TXT)", |
|
file_types=[".txt", ".pdf"], |
|
file_count="multiple", |
|
elem_id="file_upload", |
|
scale=2 |
|
) |
|
|
|
|
|
with gr.Group(): |
|
gr.Markdown("### βοΈ Core Parameters") |
|
with gr.Row(): |
|
chunk_size = gr.Slider( |
|
minimum=100, |
|
maximum=1000, |
|
value=500, |
|
step=50, |
|
label="Chunk Size", |
|
info="Number of tokens per chunk", |
|
elem_id="chunk_size" |
|
) |
|
num_questions = gr.Slider( |
|
minimum=1, |
|
maximum=20, |
|
value=5, |
|
step=1, |
|
label="Number of Questions", |
|
info="How many questions to generate", |
|
elem_id="num_questions" |
|
) |
|
|
|
difficulty_level = gr.Slider( |
|
minimum=1, |
|
maximum=5, |
|
value=3, |
|
step=0.1, |
|
label="Average Difficulty", |
|
info="1: Easy, 5: Very Hard", |
|
elem_id="difficulty" |
|
) |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(): |
|
gr.Markdown("### π― Question Types") |
|
question_types_dict = gr.CheckboxGroup( |
|
choices=[ |
|
"Analytical", "Application Based", "Conceptual", |
|
"Counterfactual", "Factual", "Open Ended", |
|
"True False", "False Premise", "Clarification", |
|
"Edge Case" |
|
], |
|
value=["Analytical", "Factual", "Conceptual", "Application Based"], |
|
label="Select Types", |
|
elem_id="question_types", |
|
elem_classes="checkbox-group" |
|
) |
|
|
|
|
|
with gr.Column(): |
|
with gr.Group(): |
|
gr.Markdown("### π Complexity") |
|
complexity_types_dict = gr.CheckboxGroup( |
|
choices=["Single Shot", "Multi Hop"], |
|
value=["Single Shot", "Multi Hop"], |
|
label="Select Complexity", |
|
elem_id="complexity_types" |
|
) |
|
|
|
with gr.Group(): |
|
gr.Markdown("### π€ Models") |
|
model_selection_dict = gr.CheckboxGroup( |
|
choices=[ |
|
"Mistral Large", |
|
"Llama-3 70B", |
|
"GPT-4", |
|
"Claude 3.5 Sonnet", |
|
"Gemini Pro" |
|
], |
|
value=["Mistral Large", "GPT-4", "Claude 3.5 Sonnet"], |
|
label="Select Models", |
|
elem_id="models", |
|
elem_classes="model-select" |
|
) |
|
|
|
process_btn = gr.Button( |
|
"π Generate Questions", |
|
variant="primary", |
|
size="lg", |
|
elem_id="generate_btn" |
|
) |
|
|
|
|
|
with gr.Column(scale=3, elem_classes="output-panel"): |
|
with gr.Group(): |
|
gr.Markdown("### π Generated Questions") |
|
output_status = gr.Textbox( |
|
label="Status", |
|
elem_id="status" |
|
) |
|
output_table = gr.Dataframe( |
|
headers=["question_type", "complexity", "question", "answer", "model", "difficulty"], |
|
label="Questions and Answers", |
|
elem_id="results_table", |
|
wrap=True |
|
) |
|
csv_output = gr.File( |
|
label="Download Results", |
|
elem_id="csv_download", |
|
elem_classes="download-btn", |
|
interactive=False |
|
) |
|
|
|
|
|
with gr.Accordion("π Instructions", open=False): |
|
gr.Markdown(""" |
|
1. **Upload Documents**: Support for .txt and .pdf files |
|
2. **Configure Parameters**: |
|
- Set chunk size for document processing |
|
- Choose number of questions to generate |
|
- Adjust difficulty level (1: Easy to 5: Very Hard) |
|
3. **Select Question Types**: Choose from various question categories |
|
4. **Set Complexity**: Single-shot or multi-hop reasoning |
|
5. **Choose Models**: Select AI models for ensemble generation |
|
6. Click 'π Generate Questions' to start |
|
7. Download results as CSV for further use |
|
""") |
|
|
|
|
|
gr.Markdown(""" |
|
### π Citation |
|
If you find this work helpful in your research or applications, please cite: |
|
""") |
|
|
|
with gr.Group(elem_classes="citation-box"): |
|
gr.Markdown("""```bibtex |
|
@misc{yourbench2024, |
|
title={Yourbench: A Dynamic Question Generation Framework for Document Understanding}, |
|
author={Your Team}, |
|
year={2024}, |
|
publisher={GitHub}, |
|
journal={GitHub repository}, |
|
howpublished={\\url{https://github.com/yourbench/yourbench}}, |
|
} |
|
```""") |
|
|
|
|
|
gr.Markdown(""" |
|
### π API Usage |
|
|
|
This tool can be used programmatically through its API. Here's how to interact with it: |
|
|
|
```python |
|
import gradio_client |
|
|
|
client = gradio_client.Client("YOUR_SPACE_URL") |
|
|
|
result = client.predict( |
|
["document.pdf"], # Input files |
|
500, # Chunk size |
|
5, # Number of questions |
|
["Analytical", "Factual"], # Question types |
|
["Single Shot"], # Complexity types |
|
3.0, # Difficulty level |
|
["GPT-4", "Claude 3.5 Sonnet"], # Models |
|
api_name="/predict" |
|
) |
|
``` |
|
|
|
Replace `YOUR_SPACE_URL` with the actual deployment URL. The API endpoint accepts the same parameters |
|
as the web interface and returns a tuple containing the results DataFrame, status message, and CSV file path. |
|
""") |
|
|
|
|
|
|
|
process_btn.click( |
|
process_files, |
|
inputs=[ |
|
input_files, chunk_size, num_questions, |
|
question_types_dict, complexity_types_dict, |
|
difficulty_level, model_selection_dict |
|
], |
|
outputs=[output_table, output_status, csv_output] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(share=True) |
|
|
|
|