Spaces:
Runtime error
Runtime error
File size: 5,858 Bytes
848034c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import gradio as gr
import json
# Load JSONL data
def load_data(path='/Users/suyashsrivastava/f_5_repro/sample_viewer/samples.jsonl'):
data = []
with open(path, 'r', encoding='utf-8') as f:
for line in f:
try:
item = json.loads(line)
if not item.get("droped", False): # Only include non-dropped items
data.append(item)
except json.JSONDecodeError:
continue # Skip invalid JSON lines
return data
data = load_data()
print(f"Loaded {len(data)} valid samples")
# Unique dropdown values with fallback values
languages = sorted(set(d.get('language', '') for d in data if d.get('language')))
modes = sorted(set(d.get('translation_mode', '') for d in data if d.get('translation_mode')))
all_categories = sorted(set(cat for d in data if d.get('categories') for cat in d.get('categories', '').split('|') if cat))
print(f"Found {len(languages)} languages, {len(modes)} modes, and {len(all_categories)} categories")
def get_categories_for_selection(language, mode):
"""Get categories available for the selected language and mode"""
if not language or not mode:
return []
filtered_categories = set()
for item in data:
if (item.get('language') == language and
item.get('translation_mode') == mode and
'categories' in item):
categories = item['categories'].split('|')
filtered_categories.update(categories)
return sorted(filtered_categories)
def filter_samples(mode, language, category):
if not mode or not language or not category:
return "Please select all filters (Translation Mode, Language, and Category)"
filtered_samples = []
count = 0
total_checked = 0
print(f"Filtering for: Mode={mode}, Language={language}, Category={category}")
for item in data:
total_checked += 1
# Skip items missing required fields
if not all(k in item for k in ['translation_mode', 'language', 'categories']):
continue
# Simple string matching for each filter
if item['translation_mode'] != mode:
continue
if item['language'] != language:
continue
if category not in item['categories'].split('|'):
continue
# If we get here, the item matches all criteria
count += 1
sample_html = f"""
<div style="margin-bottom: 20px; border: 1px solid #4a5568; border-radius: 8px; overflow: hidden; background-color: #2d3748;">
<div style="background-color: #1a202c; padding: 10px; border-bottom: 1px solid #4a5568;">
<strong style="color: #e2e8f0;">Sample {count}</strong>
</div>
<div style="padding: 15px;">
<p><strong style="color:#90cdf4;">Text:</strong><br><span style="color:#e2e8f0;">{item.get('text', '')}</span></p>
<p><strong style="color:#9ae6b4;">Translit:</strong><br><span style="color:#e2e8f0;">{item.get('translit_text') or '(None)'}</span></p>
<p><strong style="color:#fbd38d;">Original:</strong><br><span style="color:#e2e8f0;">{item.get('original_text', '')}</span></p>
</div>
</div>
"""
filtered_samples.append(sample_html)
print(f"Checked {total_checked} items, found {count} matches")
if filtered_samples:
all_html = "".join(filtered_samples)
return all_html
else:
debug_info = f"<p style='color: #e2e8f0;'>Debug info: Checked {total_checked} items, found 0 matches for Mode={mode}, Language={language}, Category={category}</p>"
return f"<p style='color: #f56565;'><strong>No matching samples found for the selected filters.</strong> Try different filter combinations.</p>{debug_info}"
# Custom CSS for dark theme
custom_css = """
footer {visibility: hidden}
.gradio-container {
background-color: #1a202c;
color: #e2e8f0;
}
.dark h1, .dark h2, .dark h3 {
color: #e2e8f0 !important;
}
.gradio-dropdown {
background-color: #2d3748;
color: #e2e8f0;
border-color: #4a5568;
}
.dark button.primary {
background-color: #4299e1 !important;
}
.dark label {
color: #e2e8f0 !important;
}
.dark p {
color: #e2e8f0 !important;
}
"""
# Gradio interface using Blocks
with gr.Blocks(title="Sample Viewer", css=custom_css, theme="dark") as demo:
gr.Markdown("## π Multilingual Sample Viewer")
gr.Markdown("Select filters to view samples from the dataset")
with gr.Row():
mode_input = gr.Dropdown(choices=[""] + modes, label="Translation Mode", value="")
lang_input = gr.Dropdown(choices=[""] + languages, label="Language", value="")
cat_input = gr.Dropdown(choices=[""], label="Category", value="")
# Update categories when language or mode changes
def update_categories(language, mode):
if not language or not mode:
return gr.Dropdown(choices=[""], value="")
categories = get_categories_for_selection(language, mode)
return gr.Dropdown(choices=[""] + categories, value="")
# Set up dependencies to update categories dropdown
lang_input.change(
fn=update_categories,
inputs=[lang_input, mode_input],
outputs=cat_input
)
mode_input.change(
fn=update_categories,
inputs=[lang_input, mode_input],
outputs=cat_input
)
submit_btn = gr.Button("π Show Samples", variant="primary")
output_display = gr.HTML(label="Results")
submit_btn.click(
fn=filter_samples,
inputs=[mode_input, lang_input, cat_input],
outputs=output_display
)
if __name__ == "__main__":
demo.launch(share=True)
|