sample_viewer / app.py
suyash-sarvam's picture
Upload folder using huggingface_hub
848034c verified
import gradio as gr
import json
# Load JSONL data
def load_data(path='/Users/suyashsrivastava/f_5_repro/sample_viewer/samples.jsonl'):
data = []
with open(path, 'r', encoding='utf-8') as f:
for line in f:
try:
item = json.loads(line)
if not item.get("droped", False): # Only include non-dropped items
data.append(item)
except json.JSONDecodeError:
continue # Skip invalid JSON lines
return data
data = load_data()
print(f"Loaded {len(data)} valid samples")
# Unique dropdown values with fallback values
languages = sorted(set(d.get('language', '') for d in data if d.get('language')))
modes = sorted(set(d.get('translation_mode', '') for d in data if d.get('translation_mode')))
all_categories = sorted(set(cat for d in data if d.get('categories') for cat in d.get('categories', '').split('|') if cat))
print(f"Found {len(languages)} languages, {len(modes)} modes, and {len(all_categories)} categories")
def get_categories_for_selection(language, mode):
"""Get categories available for the selected language and mode"""
if not language or not mode:
return []
filtered_categories = set()
for item in data:
if (item.get('language') == language and
item.get('translation_mode') == mode and
'categories' in item):
categories = item['categories'].split('|')
filtered_categories.update(categories)
return sorted(filtered_categories)
def filter_samples(mode, language, category):
if not mode or not language or not category:
return "Please select all filters (Translation Mode, Language, and Category)"
filtered_samples = []
count = 0
total_checked = 0
print(f"Filtering for: Mode={mode}, Language={language}, Category={category}")
for item in data:
total_checked += 1
# Skip items missing required fields
if not all(k in item for k in ['translation_mode', 'language', 'categories']):
continue
# Simple string matching for each filter
if item['translation_mode'] != mode:
continue
if item['language'] != language:
continue
if category not in item['categories'].split('|'):
continue
# If we get here, the item matches all criteria
count += 1
sample_html = f"""
<div style="margin-bottom: 20px; border: 1px solid #4a5568; border-radius: 8px; overflow: hidden; background-color: #2d3748;">
<div style="background-color: #1a202c; padding: 10px; border-bottom: 1px solid #4a5568;">
<strong style="color: #e2e8f0;">Sample {count}</strong>
</div>
<div style="padding: 15px;">
<p><strong style="color:#90cdf4;">Text:</strong><br><span style="color:#e2e8f0;">{item.get('text', '')}</span></p>
<p><strong style="color:#9ae6b4;">Translit:</strong><br><span style="color:#e2e8f0;">{item.get('translit_text') or '(None)'}</span></p>
<p><strong style="color:#fbd38d;">Original:</strong><br><span style="color:#e2e8f0;">{item.get('original_text', '')}</span></p>
</div>
</div>
"""
filtered_samples.append(sample_html)
print(f"Checked {total_checked} items, found {count} matches")
if filtered_samples:
all_html = "".join(filtered_samples)
return all_html
else:
debug_info = f"<p style='color: #e2e8f0;'>Debug info: Checked {total_checked} items, found 0 matches for Mode={mode}, Language={language}, Category={category}</p>"
return f"<p style='color: #f56565;'><strong>No matching samples found for the selected filters.</strong> Try different filter combinations.</p>{debug_info}"
# Custom CSS for dark theme
custom_css = """
footer {visibility: hidden}
.gradio-container {
background-color: #1a202c;
color: #e2e8f0;
}
.dark h1, .dark h2, .dark h3 {
color: #e2e8f0 !important;
}
.gradio-dropdown {
background-color: #2d3748;
color: #e2e8f0;
border-color: #4a5568;
}
.dark button.primary {
background-color: #4299e1 !important;
}
.dark label {
color: #e2e8f0 !important;
}
.dark p {
color: #e2e8f0 !important;
}
"""
# Gradio interface using Blocks
with gr.Blocks(title="Sample Viewer", css=custom_css, theme="dark") as demo:
gr.Markdown("## πŸ“˜ Multilingual Sample Viewer")
gr.Markdown("Select filters to view samples from the dataset")
with gr.Row():
mode_input = gr.Dropdown(choices=[""] + modes, label="Translation Mode", value="")
lang_input = gr.Dropdown(choices=[""] + languages, label="Language", value="")
cat_input = gr.Dropdown(choices=[""], label="Category", value="")
# Update categories when language or mode changes
def update_categories(language, mode):
if not language or not mode:
return gr.Dropdown(choices=[""], value="")
categories = get_categories_for_selection(language, mode)
return gr.Dropdown(choices=[""] + categories, value="")
# Set up dependencies to update categories dropdown
lang_input.change(
fn=update_categories,
inputs=[lang_input, mode_input],
outputs=cat_input
)
mode_input.change(
fn=update_categories,
inputs=[lang_input, mode_input],
outputs=cat_input
)
submit_btn = gr.Button("πŸ” Show Samples", variant="primary")
output_display = gr.HTML(label="Results")
submit_btn.click(
fn=filter_samples,
inputs=[mode_input, lang_input, cat_input],
outputs=output_display
)
if __name__ == "__main__":
demo.launch(share=True)