Spaces:

ai4data
/

monitoring_of_datause

Running

App Files Files Community

rafmacalaba commited on 9 days ago

Commit

b877288

1 Parent(s): 94bf6c6

switch to new data.json

Browse files

Files changed (2) hide show

app.py +312 -170
consolidated_data_optimized.json +0 -0

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ from collections import Counter, defaultdict
 import gradio as gr
 # ── Local CONFIG ──────────────────────────────────────────────────────────────
-DATA_FILE = "gradio_ner_data.json"
 def load_initial_data() -> List[Dict]:
@@ -14,19 +14,10 @@ def load_initial_data() -> List[Dict]:
         raise FileNotFoundError(f"{DATA_FILE} not found in current directory.")
     with open(DATA_FILE, "r", encoding="utf-8") as f:
         data = json.load(f)
-    # Calculate mixed types (types that have both True and False LLM assessments)
-    type_assessments = defaultdict(set)
-    for rec in data:
-        if rec.get("type") and rec.get("llm_is_dataset_contextual") is not None:
-            type_assessments[rec["type"]].add(rec["llm_is_dataset_contextual"])
-    mixed_types = {t for t, assessments in type_assessments.items() if True in assessments and False in assessments}
-    # Flag records
-    for rec in data:
-        rec["is_mixed_type"] = rec.get("type") in mixed_types
     return data
@@ -41,47 +32,90 @@ class DynamicDataset:
         return self.data[self.current]
-class MixedTypeManager:
     def __init__(self, data: List[Dict]):
-        self.grouped_data = defaultdict(lambda: {'true': [], 'false': []})
-        self.mixed_types = []
-        # Group data
         for rec in data:
             dtype = rec.get("type")
-            is_ds = rec.get("llm_is_dataset_contextual")
-            if dtype and is_ds is not None:
-                key = 'true' if is_ds else 'false'
-                self.grouped_data[dtype][key].append(rec)
-        # Identify mixed types
-        for dtype, groups in self.grouped_data.items():
-            if groups['true'] and groups['false']:
-                self.mixed_types.append(dtype)
-        # Sort by total count
-        self.mixed_types.sort(key=lambda t: len(self.grouped_data[t]['true']) + len(self.grouped_data[t]['false']), reverse=True)
-    def get_example(self, dtype: str, is_dataset: bool, idx: int) -> Dict:
-        if dtype not in self.grouped_data:
             return {}
-        group = self.grouped_data[dtype]['true' if is_dataset else 'false']
         if not group:
             return {}
-        # Cycle through examples
         safe_idx = idx % len(group)
         return group[safe_idx]
-    def get_count(self, dtype: str, is_dataset: bool) -> int:
-        if dtype not in self.grouped_data:
             return 0
-        return len(self.grouped_data[dtype]['true' if is_dataset else 'false'])
 # ── Highlight utils ──────────────────────────────────────────────────────────
 def prepare_for_highlight(rec: Dict) -> List[Tuple[str, Optional[str]]]:
     text = rec.get("text", "") or ""
-    ner_spans = rec.get("ner_annotated", rec.get("ner_text", [])) or []
     segments = []
     last_idx = 0
@@ -110,21 +144,16 @@ def prepare_for_highlight(rec: Dict) -> List[Tuple[str, Optional[str]]]:
 # ── Filtering helpers ─────────────────────────────────────────────────────────
-def record_matches_filters(rec: Dict, llm_dataset_filter: str, type_filter: str):
-    # Use LLM assessment instead of is_dataset
-    llm_is_ds = rec.get("llm_is_dataset_contextual")
-    # If LLM assessment is not available, skip this record
-    if llm_is_ds is None:
         return False
-    if llm_dataset_filter == "LLM: Datasets only" and not llm_is_ds:
-        return False
-    if llm_dataset_filter == "LLM: Non-datasets only" and llm_is_ds:
         return False
-    if llm_dataset_filter == "🔥 Show Confusion/Mixed Cases":
-        # Only show records that are part of a mixed type group
-        return rec.get("is_mixed_type", False)
     if type_filter != "All types":
         return rec.get("type") == type_filter
@@ -138,7 +167,7 @@ DOCUMENTATION = """
 ## What is this tool?
-This application helps you **review and explore dataset mentions** extracted documents.
 It displays text excerpts where potential datasets have been identified, along with metadata about each mention.
 ## What you'll see
@@ -150,6 +179,7 @@ Each record shows:
 - **✅ Dataset Status**: Whether this mention actually refers to a dataset
 - **💡 Context**: The surrounding text that provides context
 - **📝 Explanation**: Why this was classified as a dataset (or not)
 ## How to use this tool
@@ -164,33 +194,34 @@ Each record shows:
    - **All**: Show all records
    - **Datasets only**: Show only records that contain actual dataset references
    - **Non-datasets only**: Show records that were identified but don't actually refer to datasets
 2. **Data Type Filter**
    - Filter by specific data types (census, survey, database, etc.)
    - Types are sorted by frequency (most common first)
-### 💡 Tips
-- Use filters to focus on specific types of data mentions
-- The "Contains Dataset" field tells you if the mention is a true dataset reference
-- Review the "Explanation" to understand the classification reasoning
-- Highlighted text shows exactly where the dataset mention appears in context
-## 🚀 Try It Yourself!
-Want to extract datasets from your own text? Try our **Dataset Extraction Tool**:
-👉 **[Launch Dataset Extraction Tool](https://huggingface.co/spaces/ai4data/datause-extraction)**
-This interactive tool allows you to:
-- ✨ **Extract datasets** from your own text or documents
-- 📝 **Use predefined samples** to see how it works
-- 🔬 **Explore the extraction process** in real-time
-Perfect for testing the extraction capabilities on new documents or experimenting with different types of text!
 ## Data Source
-This viewer uses data from World Bank project documents.
 """
@@ -198,7 +229,7 @@ This viewer uses data from World Bank project documents.
 def create_demo() -> gr.Blocks:
     data = load_initial_data()
     dynamic_dataset = DynamicDataset(data)
-    mixed_manager = MixedTypeManager(data)
     # Count types and sort by frequency (most common first)
     type_counter = Counter(rec.get("type") for rec in data if rec.get("type"))
@@ -212,15 +243,18 @@ def create_demo() -> gr.Blocks:
         v_type = rec.get("type", "—")
         empirical_context = rec.get("empirical_context", "—")
         explanation = rec.get("explanation", "—")
-        is_mixed = rec.get("is_mixed_type", False)
-        llm_is_dataset = rec.get("llm_is_dataset_contextual")
-        # Apply conditional highlighting based on LLM assessment
-        if rec.get("ner_text") and rec.get("text") and llm_is_dataset is not None:
             try:
                 start, end = rec["ner_text"][0][0], rec["ner_text"][0][1]
                 term = rec["text"][start:end]
-                if llm_is_dataset:
                     highlight_style = 'background-color: #90ee90; color: black; padding: 2px 4px; border-radius: 4px; font-weight: bold; border: 1px solid #5cb85c;'
                 else:
                     highlight_style = 'background-color: #ff7f7f; color: black; padding: 2px 4px; border-radius: 4px; font-weight: bold; border: 1px solid #d9534f;'
@@ -231,8 +265,24 @@ def create_demo() -> gr.Blocks:
         # Build HTML
         type_html = f"<code>{v_type}</code>"
-        if is_mixed:
-            type_html += " ⚠️ <b>Mixed/Confusing Type</b>"
         html = f"""
         <h3>📄 Document Information</h3>
@@ -241,36 +291,43 @@ def create_demo() -> gr.Blocks:
         <h3>🏷️ Type</h3>
         <p>{type_html}</p>
         <h3>📝 Surrounding Text</h3>
         <p>{empirical_context}</p>
         """
-        # Add LLM contextual analysis section if available
-        llm_reasons = rec.get("llm_contextual_reason", [])
-        llm_thinking = rec.get("llm_thinking_contextual", "")
-        if llm_is_dataset is not None:
-            status_icon = '✅' if llm_is_dataset else '❌'
-            status_text = 'Is a dataset' if llm_is_dataset else 'Not a dataset'
             html += f"""
-            <h3>🤖 Contextual Analysis</h3>
-            <p><b>Assessment:</b> {status_icon} {status_text}</p>
             """
-            if llm_reasons:
-                html += "<p><b>Reasoning:</b></p><ul>"
-                for reason in llm_reasons:
-                    html += f"<li>{reason}</li>"
-                html += "</ul>"
-            if llm_thinking:
-                html += f"""
-                <p><b>Detailed Analysis:</b></p>
-                <blockquote style="border-left: 3px solid #ccc; padding-left: 10px; color: #666;">
-                {llm_thinking}
-                </blockquote>
-                """
         return html
@@ -281,10 +338,10 @@ def create_demo() -> gr.Blocks:
         return segs, idx, make_info(rec)
     # When filters change → jump to first matching record
-    def jump_on_filters(llm_dataset_filter, type_filter):
         n = dynamic_dataset.len
         for i in range(n):
-            if record_matches_filters(data[i], llm_dataset_filter, type_filter):
                 dynamic_dataset.current = i
                 rec = data[i]
                 segs = prepare_for_highlight(rec)
@@ -294,11 +351,11 @@ def create_demo() -> gr.Blocks:
         return [], 0, "⚠️ No matching records found with the selected filters."
     # Navigation respecting filters
-    def nav_next(llm_dataset_filter, type_filter):
         i = dynamic_dataset.current + 1
         n = dynamic_dataset.len
         while i < n:
-            if record_matches_filters(data[i], llm_dataset_filter, type_filter):
                 break
             i += 1
         if i >= n:
@@ -307,10 +364,10 @@ def create_demo() -> gr.Blocks:
         rec = data[i]
         return prepare_for_highlight(rec), i, make_info(rec)
-    def nav_prev(llm_dataset_filter, type_filter):
         i = dynamic_dataset.current - 1
         while i >= 0:
-            if record_matches_filters(data[i], llm_dataset_filter, type_filter):
                 break
             i -= 1
         if i < 0:
@@ -319,39 +376,61 @@ def create_demo() -> gr.Blocks:
         rec = data[i]
         return prepare_for_highlight(rec), i, make_info(rec)
-    # Comparison Logic
-    def load_comparison(dtype, pos_idx, neg_idx):
         if not dtype:
-            return [], "Select a type", [], "Select a type"
-        pos_rec = mixed_manager.get_example(dtype, True, pos_idx)
-        neg_rec = mixed_manager.get_example(dtype, False, neg_idx)
-        pos_hl = prepare_for_highlight(pos_rec)
-        neg_hl = prepare_for_highlight(neg_rec)
-        pos_info = make_info(pos_rec)
-        neg_info = make_info(neg_rec)
         # Add count info
-        pos_total = mixed_manager.get_count(dtype, True)
-        neg_total = mixed_manager.get_count(dtype, False)
-        pos_header = f"### ✅ IS Dataset ({pos_idx % pos_total + 1}/{pos_total})"
-        neg_header = f"### ❌ NOT Dataset ({neg_idx % neg_total + 1}/{neg_total})"
         return pos_hl, pos_info, neg_hl, neg_info, pos_header, neg_header
-    def next_pos(dtype, current_idx):
         return current_idx + 1
-    def next_neg(dtype, current_idx):
         return current_idx + 1
     # ---- UI ----
     with gr.Blocks(title="Monitoring of Data Use") as demo:
         gr.Markdown("# 📊 Monitoring of Data Use")
-        # gr.Markdown(f"*Exploring {dynamic_dataset.len:,} dataset mentions from World Bank documents*")
         with gr.Tabs():
             with gr.Tab("📖 How to Use"):
@@ -369,10 +448,10 @@ def create_demo() -> gr.Blocks:
                     )
                 with gr.Row():
-                    llm_dataset_filter = gr.Dropdown(
-                        choices=["🔥 Show Confusion/Mixed Cases", "All", "LLM: Datasets only", "LLM: Non-datasets only"],
-                        value="🔥 Show Confusion/Mixed Cases",
-                        label="🤖 Filter by Assessment",
                     )
                     type_filter = gr.Dropdown(
@@ -409,94 +488,157 @@ def create_demo() -> gr.Blocks:
                 )
                 # Filters
-                llm_dataset_filter.change(
                     fn=jump_on_filters,
-                    inputs=[llm_dataset_filter, type_filter],
                     outputs=[inp_box, prog, info_md],
                 )
                 type_filter.change(
                     fn=jump_on_filters,
-                    inputs=[llm_dataset_filter, type_filter],
                     outputs=[inp_box, prog, info_md],
                 )
                 # Prev / Next navigation respecting filters
                 prev_btn.click(
                     fn=nav_prev,
-                    inputs=[llm_dataset_filter, type_filter],
                     outputs=[inp_box, prog, info_md],
                 )
                 next_btn.click(
                     fn=nav_next,
-                    inputs=[llm_dataset_filter, type_filter],
                     outputs=[inp_box, prog, info_md],
                 )
             with gr.Tab("⚖️ Comparison"):
-                gr.Markdown("### Side-by-Side Comparison of Mixed Types")
-                gr.Markdown("Compare examples where the **same type** is classified differently based on context.")
-                with gr.Row():
                     comp_type_selector = gr.Dropdown(
-                        choices=mixed_manager.mixed_types,
-                        value=mixed_manager.mixed_types[0] if mixed_manager.mixed_types else None,
                         label="Select Mixed Type to Compare",
                     )
-                # State for indices
-                pos_idx_state = gr.State(0)
-                neg_idx_state = gr.State(0)
-                with gr.Row():
-                    # Left Column: Positive
-                    with gr.Column():
-                        pos_header = gr.Markdown("### ✅ IS Dataset")
-                        pos_hl_box = gr.HighlightedText(label="Context", interactive=False, show_legend=False, value="")
-                        pos_info_box = gr.HTML()
-                        pos_next_btn = gr.Button("Next Example ➡️")
-                    # Right Column: Negative
-                    with gr.Column():
-                        neg_header = gr.Markdown("### ❌ NOT Dataset")
-                        neg_hl_box = gr.HighlightedText(label="Context", interactive=False, show_legend=False, value="")
-                        neg_info_box = gr.HTML()
-                        neg_next_btn = gr.Button("Next Example ➡️")
-                # Events
                 comp_type_selector.change(
-                    fn=lambda: (0, 0), # Reset indices
-                    outputs=[pos_idx_state, neg_idx_state]
                 ).then(
-                    fn=load_comparison,
-                    inputs=[comp_type_selector, pos_idx_state, neg_idx_state],
-                    outputs=[pos_hl_box, pos_info_box, neg_hl_box, neg_info_box, pos_header, neg_header]
                 )
-                pos_next_btn.click(
                     fn=next_pos,
-                    inputs=[comp_type_selector, pos_idx_state],
-                    outputs=[pos_idx_state]
                 ).then(
-                    fn=load_comparison,
-                    inputs=[comp_type_selector, pos_idx_state, neg_idx_state],
-                    outputs=[pos_hl_box, pos_info_box, neg_hl_box, neg_info_box, pos_header, neg_header]
                 )
-                neg_next_btn.click(
                     fn=next_neg,
-                    inputs=[comp_type_selector, neg_idx_state],
-                    outputs=[neg_idx_state]
                 ).then(
-                    fn=load_comparison,
-                    inputs=[comp_type_selector, pos_idx_state, neg_idx_state],
-                    outputs=[pos_hl_box, pos_info_box, neg_hl_box, neg_info_box, pos_header, neg_header]
                 )
-                # Initial Load
-                demo.load(
-                    fn=load_comparison,
-                    inputs=[comp_type_selector, pos_idx_state, neg_idx_state],
-                    outputs=[pos_hl_box, pos_info_box, neg_hl_box, neg_info_box, pos_header, neg_header]
                 )
     return demo

 import gradio as gr
 # ── Local CONFIG ──────────────────────────────────────────────────────────────
+DATA_FILE = "consolidated_data_optimized.json"
 def load_initial_data() -> List[Dict]:
         raise FileNotFoundError(f"{DATA_FILE} not found in current directory.")
     with open(DATA_FILE, "r", encoding="utf-8") as f:
         data = json.load(f)
+    # Sort to show records with relations first (most informative)
+    data.sort(key=lambda x: len(x.get('ner_text', [])), reverse=True)
     return data
         return self.data[self.current]
+class ComparisonManager:
     def __init__(self, data: List[Dict]):
+        self.data = data
+        # Group by type
+        self.type_groups = defaultdict(lambda: {'validated': [], 'not_validated': []})
+        # Group by term (extract from ner_text)
+        self.term_groups = defaultdict(lambda: {'validated': [], 'not_validated': []})
         for rec in data:
             dtype = rec.get("type")
+            is_validated = rec.get("validated", False)
+            tags = rec.get("tags", [])
+            # Only include borderline cases
+            if "borderline" not in tags:
+                continue
+            # Group by type
+            if dtype:
+                key = 'validated' if is_validated else 'not_validated'
+                self.type_groups[dtype][key].append(rec)
+            # Extract term from ner_text
+            if rec.get('ner_text') and len(rec['ner_text']) > 0:
+                start, end, label = rec['ner_text'][0]
+                if label == 'named' and rec.get('text'):
+                    term = rec['text'][start:end]
+                    if term and "confusing_term" in tags:
+                        key = 'validated' if is_validated else 'not_validated'
+                        self.term_groups[term][key].append(rec)
+        # Get mixed types (sorted by total count)
+        self.mixed_types = []
+        for dtype, groups in self.type_groups.items():
+            if groups['validated'] and groups['not_validated']:
+                total = len(groups['validated']) + len(groups['not_validated'])
+                self.mixed_types.append((dtype, total))
+        self.mixed_types.sort(key=lambda x: x[1], reverse=True)
+        self.mixed_types = [t[0] for t in self.mixed_types]
+        # Get confusing terms (sorted by total count)
+        self.confusing_terms = []
+        for term, groups in self.term_groups.items():
+            if groups['validated'] and groups['not_validated']:
+                total = len(groups['validated']) + len(groups['not_validated'])
+                self.confusing_terms.append((term, total))
+        self.confusing_terms.sort(key=lambda x: x[1], reverse=True)
+        self.confusing_terms = [t[0] for t in self.confusing_terms]
+    def get_example_by_type(self, dtype: str, is_validated: bool, idx: int) -> Dict:
+        if dtype not in self.type_groups:
+            return {}
+        group = self.type_groups[dtype]['validated' if is_validated else 'not_validated']
+        if not group:
+            return {}
+        safe_idx = idx % len(group)
+        return group[safe_idx]
+    def get_count_by_type(self, dtype: str, is_validated: bool) -> int:
+        if dtype not in self.type_groups:
+            return 0
+        return len(self.type_groups[dtype]['validated' if is_validated else 'not_validated'])
+    def get_example_by_term(self, term: str, is_validated: bool, idx: int) -> Dict:
+        if term not in self.term_groups:
             return {}
+        group = self.term_groups[term]['validated' if is_validated else 'not_validated']
         if not group:
             return {}
         safe_idx = idx % len(group)
         return group[safe_idx]
+    def get_count_by_term(self, term: str, is_validated: bool) -> int:
+        if term not in self.term_groups:
             return 0
+        return len(self.term_groups[term]['validated' if is_validated else 'not_validated'])
 # ── Highlight utils ──────────────────────────────────────────────────────────
 def prepare_for_highlight(rec: Dict) -> List[Tuple[str, Optional[str]]]:
     text = rec.get("text", "") or ""
+    ner_spans = rec.get("ner_text", []) or []
     segments = []
     last_idx = 0
 # ── Filtering helpers ─────────────────────────────────────────────────────────
+def record_matches_filters(rec: Dict, dataset_filter: str, type_filter: str):
+    is_validated = rec.get("validated", False)
+    tags = rec.get("tags", [])
+    if dataset_filter == "Datasets only" and not is_validated:
         return False
+    if dataset_filter == "Non-datasets only" and is_validated:
         return False
+    if dataset_filter == "Borderline Cases Only":
+        return "borderline" in tags
     if type_filter != "All types":
         return rec.get("type") == type_filter
 ## What is this tool?
+This application helps you **review and explore dataset mentions** extracted from documents.
 It displays text excerpts where potential datasets have been identified, along with metadata about each mention.
 ## What you'll see
 - **✅ Dataset Status**: Whether this mention actually refers to a dataset
 - **💡 Context**: The surrounding text that provides context
 - **📝 Explanation**: Why this was classified as a dataset (or not)
+- **🏷️ Tags**: Borderline, mixed type, or confusing term indicators
 ## How to use this tool
    - **All**: Show all records
    - **Datasets only**: Show only records that contain actual dataset references
    - **Non-datasets only**: Show records that were identified but don't actually refer to datasets
+   - **🔥 Borderline Cases Only**: Show only confusing/mixed cases
 2. **Data Type Filter**
    - Filter by specific data types (census, survey, database, etc.)
    - Types are sorted by frequency (most common first)
+### ⚖️ Comparison Tab
+The Comparison tab helps you understand **why the same type or term** can be validated differently:
+1. **By Type**: Compare examples of the same data type (e.g., "system") with different validation outcomes
+2. **By Term**: Compare the exact same term (e.g., "Project MIS") appearing in different contexts
+This helps identify:
+- What contextual signals distinguish valid from invalid datasets
+- Why borderline cases are confusing
+- Patterns in validation decisions
+### 💡 Tips
+- Use filters to focus on specific types of data mentions
+- The "Validated" field tells you if the mention is a true dataset reference
+- Review the "Explanation" to understand the classification reasoning
+- Highlighted text shows exactly where the dataset mention appears in context
+- Check tags to identify borderline/confusing cases
 ## Data Source
+This viewer uses data from World Bank project documents with revalidation analysis.
 """
 def create_demo() -> gr.Blocks:
     data = load_initial_data()
     dynamic_dataset = DynamicDataset(data)
+    comparison_manager = ComparisonManager(data)
     # Count types and sort by frequency (most common first)
     type_counter = Counter(rec.get("type") for rec in data if rec.get("type"))
         v_type = rec.get("type", "—")
         empirical_context = rec.get("empirical_context", "—")
         explanation = rec.get("explanation", "—")
+        tags = rec.get("tags", [])
+        is_validated = rec.get("validated", False)
+        contextual_signal = rec.get("contextual_signal", "—")
+        contextual_reason_model = rec.get("contextual_reason_model", "—")
+        contextual_reason_agent = rec.get("contextual_reason_agent", "—")
+        # Apply conditional highlighting based on validation
+        if rec.get("ner_text") and rec.get("text") and is_validated is not None:
             try:
                 start, end = rec["ner_text"][0][0], rec["ner_text"][0][1]
                 term = rec["text"][start:end]
+                if is_validated:
                     highlight_style = 'background-color: #90ee90; color: black; padding: 2px 4px; border-radius: 4px; font-weight: bold; border: 1px solid #5cb85c;'
                 else:
                     highlight_style = 'background-color: #ff7f7f; color: black; padding: 2px 4px; border-radius: 4px; font-weight: bold; border: 1px solid #d9534f;'
         # Build HTML
         type_html = f"<code>{v_type}</code>"
+        # Add type stats if available
+        type_stats = rec.get("type_stats")
+        if type_stats:
+            type_html += f" <small>(Type: {type_stats['validated']} ✅ / {type_stats['not_validated']} ❌)</small>"
+        tags_html = ""
+        # Add tags
+        if tags:
+            tag_badges = []
+            if "borderline" in tags:
+                tag_badges.append("⚠️ <b>Borderline</b>")
+            if "mixed_type" in tags:
+                tag_badges.append("🔍 <b>Mixed Type</b>")
+            if "confusing_term" in tags:
+                tag_badges.append("🤔 <b>Confusing Term</b>")
+            if tag_badges:
+                tags_html = " ".join(tag_badges)
         html = f"""
         <h3>📄 Document Information</h3>
         <h3>🏷️ Type</h3>
         <p>{type_html}</p>
+        """
+        if tags_html:
+            html += f"""
+            <h3>🚩 Tags</h3>
+            <p>{tags_html}</p>
+            """
+        html += f"""
         <h3>📝 Surrounding Text</h3>
         <p>{empirical_context}</p>
         """
+        # Add validation analysis
+        status_icon = '✅' if is_validated else '❌'
+        status_text = 'Is a dataset' if is_validated else 'Not a dataset'
+        html += f"""
+        <h3>🤖 Validation Analysis</h3>
+        <p><b>Assessment:</b> {status_icon} {status_text}</p>
+        <p><b>Contextual Signal:</b> <code>{contextual_signal}</code></p>
+        """
+        if contextual_reason_agent:
             html += f"""
+            <p><b>Agent Reasoning:</b></p>
+            <blockquote style="border-left: 3px solid #ccc; padding-left: 10px; color: #666;">
+            {contextual_reason_agent}
+            </blockquote>
+            """
+        if contextual_reason_model:
+            html += f"""
+            <p><b>Model Reasoning:</b></p>
+            <blockquote style="border-left: 3px solid #999; padding-left: 10px; color: #888;">
+            {contextual_reason_model}
+            </blockquote>
             """
         return html
         return segs, idx, make_info(rec)
     # When filters change → jump to first matching record
+    def jump_on_filters(dataset_filter, type_filter):
         n = dynamic_dataset.len
         for i in range(n):
+            if record_matches_filters(data[i], dataset_filter, type_filter):
                 dynamic_dataset.current = i
                 rec = data[i]
                 segs = prepare_for_highlight(rec)
         return [], 0, "⚠️ No matching records found with the selected filters."
     # Navigation respecting filters
+    def nav_next(dataset_filter, type_filter):
         i = dynamic_dataset.current + 1
         n = dynamic_dataset.len
         while i < n:
+            if record_matches_filters(data[i], dataset_filter, type_filter):
                 break
             i += 1
         if i >= n:
         rec = data[i]
         return prepare_for_highlight(rec), i, make_info(rec)
+    def nav_prev(dataset_filter, type_filter):
         i = dynamic_dataset.current - 1
         while i >= 0:
+            if record_matches_filters(data[i], dataset_filter, type_filter):
                 break
             i -= 1
         if i < 0:
         rec = data[i]
         return prepare_for_highlight(rec), i, make_info(rec)
+    # Comparison Logic - By Type
+    def load_type_comparison(dtype, pos_idx, neg_idx):
         if not dtype:
+            return [], "Select a type", [], "Select a type", "### ✅ IS Dataset", "### ❌ NOT Dataset"
+        pos_rec = comparison_manager.get_example_by_type(dtype, True, pos_idx)
+        neg_rec = comparison_manager.get_example_by_type(dtype, False, neg_idx)
+        pos_hl = prepare_for_highlight(pos_rec) if pos_rec else []
+        neg_hl = prepare_for_highlight(neg_rec) if neg_rec else []
+        pos_info = make_info(pos_rec) if pos_rec else "No examples"
+        neg_info = make_info(neg_rec) if neg_rec else "No examples"
         # Add count info
+        pos_total = comparison_manager.get_count_by_type(dtype, True)
+        neg_total = comparison_manager.get_count_by_type(dtype, False)
+        pos_header = f"### ✅ IS Dataset ({(pos_idx % pos_total) + 1 if pos_total > 0 else 0}/{pos_total})"
+        neg_header = f"### ❌ NOT Dataset ({(neg_idx % neg_total) + 1 if neg_total > 0 else 0}/{neg_total})"
         return pos_hl, pos_info, neg_hl, neg_info, pos_header, neg_header
+    # Comparison Logic - By Term
+    def load_term_comparison(term, pos_idx, neg_idx):
+        if not term:
+            return [], "Select a term", [], "Select a term", "### ✅ IS Dataset", "### ❌ NOT Dataset"
+        pos_rec = comparison_manager.get_example_by_term(term, True, pos_idx)
+        neg_rec = comparison_manager.get_example_by_term(term, False, neg_idx)
+        pos_hl = prepare_for_highlight(pos_rec) if pos_rec else []
+        neg_hl = prepare_for_highlight(neg_rec) if neg_rec else []
+        pos_info = make_info(pos_rec) if pos_rec else "No examples"
+        neg_info = make_info(neg_rec) if neg_rec else "No examples"
+        # Add count info
+        pos_total = comparison_manager.get_count_by_term(term, True)
+        neg_total = comparison_manager.get_count_by_term(term, False)
+        pos_header = f"### ✅ IS Dataset ({(pos_idx % pos_total) + 1 if pos_total > 0 else 0}/{pos_total})"
+        neg_header = f"### ❌ NOT Dataset ({(neg_idx % neg_total) + 1 if neg_total > 0 else 0}/{neg_total})"
+        return pos_hl, pos_info, neg_hl, neg_info, pos_header, neg_header
+    def next_pos(current_idx):
         return current_idx + 1
+    def next_neg(current_idx):
         return current_idx + 1
     # ---- UI ----
     with gr.Blocks(title="Monitoring of Data Use") as demo:
         gr.Markdown("# 📊 Monitoring of Data Use")
         with gr.Tabs():
             with gr.Tab("📖 How to Use"):
                     )
                 with gr.Row():
+                    dataset_filter = gr.Dropdown(
+                        choices=["All", "Datasets only", "Non-datasets only", "Borderline Cases Only"],
+                        value="Datasets only",
+                        label="🎯 Filter by Validation Status",
                     )
                     type_filter = gr.Dropdown(
                 )
                 # Filters
+                dataset_filter.change(
                     fn=jump_on_filters,
+                    inputs=[dataset_filter, type_filter],
                     outputs=[inp_box, prog, info_md],
                 )
                 type_filter.change(
                     fn=jump_on_filters,
+                    inputs=[dataset_filter, type_filter],
                     outputs=[inp_box, prog, info_md],
                 )
                 # Prev / Next navigation respecting filters
                 prev_btn.click(
                     fn=nav_prev,
+                    inputs=[dataset_filter, type_filter],
                     outputs=[inp_box, prog, info_md],
                 )
                 next_btn.click(
                     fn=nav_next,
+                    inputs=[dataset_filter, type_filter],
                     outputs=[inp_box, prog, info_md],
                 )
             with gr.Tab("⚖️ Comparison"):
+                gr.Markdown("### Side-by-Side Comparison of Borderline Cases")
+                gr.Markdown("Compare examples to understand **why the same type or term** is validated differently based on context.")
+                comparison_mode = gr.Radio(
+                    choices=["By Type", "By Term"],
+                    value="By Type",
+                    label="Comparison Mode"
+                )
+                # Type comparison
+                with gr.Group(visible=True) as type_comparison_group:
+                    gr.Markdown("**Compare by Data Type**: See how the same type (e.g., 'system') can be valid or invalid")
                     comp_type_selector = gr.Dropdown(
+                        choices=comparison_manager.mixed_types,
+                        value=comparison_manager.mixed_types[0] if comparison_manager.mixed_types else None,
                         label="Select Mixed Type to Compare",
                     )
+                    type_pos_idx_state = gr.State(0)
+                    type_neg_idx_state = gr.State(0)
+                    with gr.Row():
+                        with gr.Column():
+                            type_pos_header = gr.Markdown("### ✅ IS Dataset")
+                            type_pos_hl_box = gr.HighlightedText(label="Context", interactive=False, show_legend=False, value="")
+                            type_pos_info_box = gr.HTML()
+                            type_pos_next_btn = gr.Button("Next Example ➡️")
+                        with gr.Column():
+                            type_neg_header = gr.Markdown("### ❌ NOT Dataset")
+                            type_neg_hl_box = gr.HighlightedText(label="Context", interactive=False, show_legend=False, value="")
+                            type_neg_info_box = gr.HTML()
+                            type_neg_next_btn = gr.Button("Next Example ➡️")
+                # Term comparison
+                with gr.Group(visible=False) as term_comparison_group:
+                    gr.Markdown("**Compare by Term**: See how the exact same term appears in different validation contexts")
+                    comp_term_selector = gr.Dropdown(
+                        choices=comparison_manager.confusing_terms,
+                        value=comparison_manager.confusing_terms[0] if comparison_manager.confusing_terms else None,
+                        label="Select Confusing Term to Compare",
+                    )
+                    term_pos_idx_state = gr.State(0)
+                    term_neg_idx_state = gr.State(0)
+                    with gr.Row():
+                        with gr.Column():
+                            term_pos_header = gr.Markdown("### ✅ IS Dataset")
+                            term_pos_hl_box = gr.HighlightedText(label="Context", interactive=False, show_legend=False, value="")
+                            term_pos_info_box = gr.HTML()
+                            term_pos_next_btn = gr.Button("Next Example ➡️")
+                        with gr.Column():
+                            term_neg_header = gr.Markdown("### ❌ NOT Dataset")
+                            term_neg_hl_box = gr.HighlightedText(label="Context", interactive=False, show_legend=False, value="")
+                            term_neg_info_box = gr.HTML()
+                            term_neg_next_btn = gr.Button("Next Example ➡️")
+                # Toggle visibility based on mode
+                def toggle_comparison_mode(mode):
+                    return gr.update(visible=mode == "By Type"), gr.update(visible=mode == "By Term")
+                comparison_mode.change(
+                    fn=toggle_comparison_mode,
+                    inputs=[comparison_mode],
+                    outputs=[type_comparison_group, term_comparison_group]
+                )
+                # Type comparison events
                 comp_type_selector.change(
+                    fn=lambda: (0, 0),
+                    outputs=[type_pos_idx_state, type_neg_idx_state]
                 ).then(
+                    fn=load_type_comparison,
+                    inputs=[comp_type_selector, type_pos_idx_state, type_neg_idx_state],
+                    outputs=[type_pos_hl_box, type_pos_info_box, type_neg_hl_box, type_neg_info_box, type_pos_header, type_neg_header]
                 )
+                type_pos_next_btn.click(
                     fn=next_pos,
+                    inputs=[type_pos_idx_state],
+                    outputs=[type_pos_idx_state]
                 ).then(
+                    fn=load_type_comparison,
+                    inputs=[comp_type_selector, type_pos_idx_state, type_neg_idx_state],
+                    outputs=[type_pos_hl_box, type_pos_info_box, type_neg_hl_box, type_neg_info_box, type_pos_header, type_neg_header]
                 )
+                type_neg_next_btn.click(
                     fn=next_neg,
+                    inputs=[type_neg_idx_state],
+                    outputs=[type_neg_idx_state]
+                ).then(
+                    fn=load_type_comparison,
+                    inputs=[comp_type_selector, type_pos_idx_state, type_neg_idx_state],
+                    outputs=[type_pos_hl_box, type_pos_info_box, type_neg_hl_box, type_neg_info_box, type_pos_header, type_neg_header]
+                )
+                # Term comparison events
+                comp_term_selector.change(
+                    fn=lambda: (0, 0),
+                    outputs=[term_pos_idx_state, term_neg_idx_state]
                 ).then(
+                    fn=load_term_comparison,
+                    inputs=[comp_term_selector, term_pos_idx_state, term_neg_idx_state],
+                    outputs=[term_pos_hl_box, term_pos_info_box, term_neg_hl_box, term_neg_info_box, term_pos_header, term_neg_header]
                 )
+                term_pos_next_btn.click(
+                    fn=next_pos,
+                    inputs=[term_pos_idx_state],
+                    outputs=[term_pos_idx_state]
+                ).then(
+                    fn=load_term_comparison,
+                    inputs=[comp_term_selector, term_pos_idx_state, term_neg_idx_state],
+                    outputs=[term_pos_hl_box, term_pos_info_box, term_neg_hl_box, term_neg_info_box, term_pos_header, term_neg_header]
+                )
+                term_neg_next_btn.click(
+                    fn=next_neg,
+                    inputs=[term_neg_idx_state],
+                    outputs=[term_neg_idx_state]
+                ).then(
+                    fn=load_term_comparison,
+                    inputs=[comp_term_selector, term_pos_idx_state, term_neg_idx_state],
+                    outputs=[term_pos_hl_box, term_pos_info_box, term_neg_hl_box, term_neg_info_box, term_pos_header, term_neg_header]
                 )
     return demo

consolidated_data_optimized.json ADDED Viewed

The diff for this file is too large to render. See raw diff