File size: 5,858 Bytes
848034c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import gradio as gr
import json

# Load JSONL data
def load_data(path='/Users/suyashsrivastava/f_5_repro/sample_viewer/samples.jsonl'):
    data = []
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            try:
                item = json.loads(line)
                if not item.get("droped", False):  # Only include non-dropped items
                    data.append(item)
            except json.JSONDecodeError:
                continue  # Skip invalid JSON lines
    return data

data = load_data()
print(f"Loaded {len(data)} valid samples")

# Unique dropdown values with fallback values
languages = sorted(set(d.get('language', '') for d in data if d.get('language')))
modes = sorted(set(d.get('translation_mode', '') for d in data if d.get('translation_mode')))
all_categories = sorted(set(cat for d in data if d.get('categories') for cat in d.get('categories', '').split('|') if cat))

print(f"Found {len(languages)} languages, {len(modes)} modes, and {len(all_categories)} categories")

def get_categories_for_selection(language, mode):
    """Get categories available for the selected language and mode"""
    if not language or not mode:
        return []
        
    filtered_categories = set()
    for item in data:
        if (item.get('language') == language and 
            item.get('translation_mode') == mode and 
            'categories' in item):
            
            categories = item['categories'].split('|')
            filtered_categories.update(categories)
    
    return sorted(filtered_categories)

def filter_samples(mode, language, category):
    if not mode or not language or not category:
        return "Please select all filters (Translation Mode, Language, and Category)"
    
    filtered_samples = []
    count = 0
    total_checked = 0
    
    print(f"Filtering for: Mode={mode}, Language={language}, Category={category}")
    
    for item in data:
        total_checked += 1
        
        # Skip items missing required fields
        if not all(k in item for k in ['translation_mode', 'language', 'categories']):
            continue
        
        # Simple string matching for each filter
        if item['translation_mode'] != mode:
            continue
            
        if item['language'] != language:
            continue
            
        if category not in item['categories'].split('|'):
            continue
        
        # If we get here, the item matches all criteria
        count += 1
        sample_html = f"""
        <div style="margin-bottom: 20px; border: 1px solid #4a5568; border-radius: 8px; overflow: hidden; background-color: #2d3748;">
            <div style="background-color: #1a202c; padding: 10px; border-bottom: 1px solid #4a5568;">
                <strong style="color: #e2e8f0;">Sample {count}</strong>
            </div>
            <div style="padding: 15px;">
                <p><strong style="color:#90cdf4;">Text:</strong><br><span style="color:#e2e8f0;">{item.get('text', '')}</span></p>
                <p><strong style="color:#9ae6b4;">Translit:</strong><br><span style="color:#e2e8f0;">{item.get('translit_text') or '(None)'}</span></p>
                <p><strong style="color:#fbd38d;">Original:</strong><br><span style="color:#e2e8f0;">{item.get('original_text', '')}</span></p>
            </div>
        </div>
        """
        filtered_samples.append(sample_html)
    
    print(f"Checked {total_checked} items, found {count} matches")
    
    if filtered_samples:
        all_html = "".join(filtered_samples)
        return all_html
    else:
        debug_info = f"<p style='color: #e2e8f0;'>Debug info: Checked {total_checked} items, found 0 matches for Mode={mode}, Language={language}, Category={category}</p>"
        return f"<p style='color: #f56565;'><strong>No matching samples found for the selected filters.</strong> Try different filter combinations.</p>{debug_info}"

# Custom CSS for dark theme
custom_css = """
footer {visibility: hidden} 
.gradio-container {
    background-color: #1a202c;
    color: #e2e8f0;
}
.dark h1, .dark h2, .dark h3 {
    color: #e2e8f0 !important;
}
.gradio-dropdown {
    background-color: #2d3748;
    color: #e2e8f0;
    border-color: #4a5568;
}
.dark button.primary {
    background-color: #4299e1 !important;
}
.dark label {
    color: #e2e8f0 !important;
}
.dark p {
    color: #e2e8f0 !important;
}
"""

# Gradio interface using Blocks
with gr.Blocks(title="Sample Viewer", css=custom_css, theme="dark") as demo:
    gr.Markdown("## πŸ“˜ Multilingual Sample Viewer")
    gr.Markdown("Select filters to view samples from the dataset")

    with gr.Row():
        mode_input = gr.Dropdown(choices=[""] + modes, label="Translation Mode", value="")
        lang_input = gr.Dropdown(choices=[""] + languages, label="Language", value="")
        cat_input = gr.Dropdown(choices=[""], label="Category", value="")
    
    # Update categories when language or mode changes
    def update_categories(language, mode):
        if not language or not mode:
            return gr.Dropdown(choices=[""], value="")
        
        categories = get_categories_for_selection(language, mode)
        return gr.Dropdown(choices=[""] + categories, value="")
    
    # Set up dependencies to update categories dropdown
    lang_input.change(
        fn=update_categories,
        inputs=[lang_input, mode_input],
        outputs=cat_input
    )
    
    mode_input.change(
        fn=update_categories,
        inputs=[lang_input, mode_input],
        outputs=cat_input
    )
    
    submit_btn = gr.Button("πŸ” Show Samples", variant="primary")
    output_display = gr.HTML(label="Results")

    submit_btn.click(
        fn=filter_samples,
        inputs=[mode_input, lang_input, cat_input],
        outputs=output_display
    )

if __name__ == "__main__":
    demo.launch(share=True)