Spaces:
Runtime error
Runtime error
import gradio as gr | |
import json | |
# Load JSONL data | |
def load_data(path='/Users/suyashsrivastava/f_5_repro/sample_viewer/samples.jsonl'): | |
data = [] | |
with open(path, 'r', encoding='utf-8') as f: | |
for line in f: | |
try: | |
item = json.loads(line) | |
if not item.get("droped", False): # Only include non-dropped items | |
data.append(item) | |
except json.JSONDecodeError: | |
continue # Skip invalid JSON lines | |
return data | |
data = load_data() | |
print(f"Loaded {len(data)} valid samples") | |
# Unique dropdown values with fallback values | |
languages = sorted(set(d.get('language', '') for d in data if d.get('language'))) | |
modes = sorted(set(d.get('translation_mode', '') for d in data if d.get('translation_mode'))) | |
all_categories = sorted(set(cat for d in data if d.get('categories') for cat in d.get('categories', '').split('|') if cat)) | |
print(f"Found {len(languages)} languages, {len(modes)} modes, and {len(all_categories)} categories") | |
def get_categories_for_selection(language, mode): | |
"""Get categories available for the selected language and mode""" | |
if not language or not mode: | |
return [] | |
filtered_categories = set() | |
for item in data: | |
if (item.get('language') == language and | |
item.get('translation_mode') == mode and | |
'categories' in item): | |
categories = item['categories'].split('|') | |
filtered_categories.update(categories) | |
return sorted(filtered_categories) | |
def filter_samples(mode, language, category): | |
if not mode or not language or not category: | |
return "Please select all filters (Translation Mode, Language, and Category)" | |
filtered_samples = [] | |
count = 0 | |
total_checked = 0 | |
print(f"Filtering for: Mode={mode}, Language={language}, Category={category}") | |
for item in data: | |
total_checked += 1 | |
# Skip items missing required fields | |
if not all(k in item for k in ['translation_mode', 'language', 'categories']): | |
continue | |
# Simple string matching for each filter | |
if item['translation_mode'] != mode: | |
continue | |
if item['language'] != language: | |
continue | |
if category not in item['categories'].split('|'): | |
continue | |
# If we get here, the item matches all criteria | |
count += 1 | |
sample_html = f""" | |
<div style="margin-bottom: 20px; border: 1px solid #4a5568; border-radius: 8px; overflow: hidden; background-color: #2d3748;"> | |
<div style="background-color: #1a202c; padding: 10px; border-bottom: 1px solid #4a5568;"> | |
<strong style="color: #e2e8f0;">Sample {count}</strong> | |
</div> | |
<div style="padding: 15px;"> | |
<p><strong style="color:#90cdf4;">Text:</strong><br><span style="color:#e2e8f0;">{item.get('text', '')}</span></p> | |
<p><strong style="color:#9ae6b4;">Translit:</strong><br><span style="color:#e2e8f0;">{item.get('translit_text') or '(None)'}</span></p> | |
<p><strong style="color:#fbd38d;">Original:</strong><br><span style="color:#e2e8f0;">{item.get('original_text', '')}</span></p> | |
</div> | |
</div> | |
""" | |
filtered_samples.append(sample_html) | |
print(f"Checked {total_checked} items, found {count} matches") | |
if filtered_samples: | |
all_html = "".join(filtered_samples) | |
return all_html | |
else: | |
debug_info = f"<p style='color: #e2e8f0;'>Debug info: Checked {total_checked} items, found 0 matches for Mode={mode}, Language={language}, Category={category}</p>" | |
return f"<p style='color: #f56565;'><strong>No matching samples found for the selected filters.</strong> Try different filter combinations.</p>{debug_info}" | |
# Custom CSS for dark theme | |
custom_css = """ | |
footer {visibility: hidden} | |
.gradio-container { | |
background-color: #1a202c; | |
color: #e2e8f0; | |
} | |
.dark h1, .dark h2, .dark h3 { | |
color: #e2e8f0 !important; | |
} | |
.gradio-dropdown { | |
background-color: #2d3748; | |
color: #e2e8f0; | |
border-color: #4a5568; | |
} | |
.dark button.primary { | |
background-color: #4299e1 !important; | |
} | |
.dark label { | |
color: #e2e8f0 !important; | |
} | |
.dark p { | |
color: #e2e8f0 !important; | |
} | |
""" | |
# Gradio interface using Blocks | |
with gr.Blocks(title="Sample Viewer", css=custom_css, theme="dark") as demo: | |
gr.Markdown("## π Multilingual Sample Viewer") | |
gr.Markdown("Select filters to view samples from the dataset") | |
with gr.Row(): | |
mode_input = gr.Dropdown(choices=[""] + modes, label="Translation Mode", value="") | |
lang_input = gr.Dropdown(choices=[""] + languages, label="Language", value="") | |
cat_input = gr.Dropdown(choices=[""], label="Category", value="") | |
# Update categories when language or mode changes | |
def update_categories(language, mode): | |
if not language or not mode: | |
return gr.Dropdown(choices=[""], value="") | |
categories = get_categories_for_selection(language, mode) | |
return gr.Dropdown(choices=[""] + categories, value="") | |
# Set up dependencies to update categories dropdown | |
lang_input.change( | |
fn=update_categories, | |
inputs=[lang_input, mode_input], | |
outputs=cat_input | |
) | |
mode_input.change( | |
fn=update_categories, | |
inputs=[lang_input, mode_input], | |
outputs=cat_input | |
) | |
submit_btn = gr.Button("π Show Samples", variant="primary") | |
output_display = gr.HTML(label="Results") | |
submit_btn.click( | |
fn=filter_samples, | |
inputs=[mode_input, lang_input, cat_input], | |
outputs=output_display | |
) | |
if __name__ == "__main__": | |
demo.launch(share=True) | |