Spaces:

mk1985
/

Historical-Text-Analyser

Sleeping

App Files Files Community

mk1985 commited on Jul 23

Commit

e9738aa

verified ·

1 Parent(s): 0e88058

Update app.py

Browse files

Files changed (1) hide show

app.py +266 -243

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # 📚 Install dependencies
 # Make sure to run this in your environment if you haven't already
-# !pip install openai anthropic google-generativeai gradio transformers torch gliner pandas --quiet
 # ⚙️ Imports
 import openai
@@ -8,12 +8,10 @@ import anthropic
 import google.generativeai as genai
 import gradio as gr
 from gliner import GLiNER
-import traceback
 from collections import defaultdict, Counter
-import re
 import os
-import pandas as pd
-import tempfile
 # 🧠 Supported models and their providers
 MODEL_OPTIONS = {
@@ -27,317 +25,342 @@ GLINER_MODEL_NAME = "urchade/gliner_large-v2.1"
 # --- Load the model only once at startup ---
 try:
-    print("Loading GLiNER model... This may take a moment.")
     gliner_model = GLiNER.from_pretrained(GLINER_MODEL_NAME)
-    print("GLiNER model loaded successfully.")
 except Exception as e:
     print(f"FATAL ERROR: Could not load GLiNER model. The app will not be able to find entities. Error: {e}")
     gliner_model = None
-# --- Prompt and other constants remain the same ---
-HIERARCHICAL_PROMPT_TEMPLATE = """You are a helpful research assistant specializing in history. Your task is to brainstorm a hierarchical set of keywords and named entities related to a historical topic.
-The user will provide a topic. You should generate a structured list of categories and, for each category, a comma-separated list of relevant keywords or phrases. These keywords should be things a researcher might want to search for in a historical text.
-Rules:
-1.  Structure your response using Markdown.
-2.  Use '###' for each category title (e.g., '### Key Figures').
-3.  Beneath each category, provide a single bullet point '-' followed by a comma-separated list of 5-10 specific keywords or entities.
-4.  Do not add any introductory or concluding sentences. Just provide the structured list.
-5.  The keywords should be specific and likely to appear in primary or secondary source documents.
-Example for the topic "The Protestant Reformation":
-### Key Figures
-- Martin Luther, John Calvin, Huldrych Zwingli, Henry VIII, Charles V, Pope Leo X
-### Core Theological Concepts
-- Sola Scriptura, Sola Fide, Indulgences, Priesthood of all believers, Justification by faith
-### Key Events
-- Diet of Worms, Ninety-five Theses, Marburg Colloquy, Council of Trent, Edict of Worms
-### Important Locations
-- Wittenberg, Geneva, Rome, Wartburg Castle, Augsburg
-### Associated Groups
-- Protestants, Lutherans, Calvinists, Anabaptists, Huguenots, Catholic Church
-Now, generate the framework for the following topic:
-Topic: {topic}"""
-TRADITIONAL_NER_LABELS = ["PERSON", "ORGANIZATION", "LOCATION", "DATE", "EVENT", "WORK_OF_ART", "LAW"]
-MAX_CATEGORIES = 8
-with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break: break-word; }") as demo:
-    # --- UI remains the same up to the output tabs ---
-    gr.Markdown("# Historical Text Analysis Tool")
-    gr.Markdown("A tool to help historians and researchers quickly identify key terms and concepts in texts. Start by generating keyword ideas for a topic, then paste your text to find all occurrences.") # Welcome text collapsed for brevity
-    gr.Markdown("---")
-    gr.Markdown("## Step 1: Get Keyword Ideas")
     with gr.Row():
-        topic = gr.Textbox(label="Enter Historical Topic", placeholder="e.g., The Chartist Movement")
-        provider = gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), label="Choose AI Model")
     with gr.Row():
         openai_key = gr.Textbox(label="OpenAI API Key", type="password")
         anthropic_key = gr.Textbox(label="Anthropic API Key", type="password")
         google_key = gr.Textbox(label="Google API Key", type="password")
-    generate_btn = gr.Button("Suggest Categories and Keywords", variant="primary")
-    gr.Markdown("--- \n## Step 2: Build Your Search and Analyze Text")
-    category_components = []
     with gr.Column():
         for i in range(MAX_CATEGORIES):
-            with gr.Accordion(f"Category {i+1}", visible=False) as acc:
                 with gr.Row():
-                    cg = gr.CheckboxGroup(label="Keywords", interactive=True, container=False, scale=4)
-                    toggle_btn = gr.Button("Deselect All", size="sm", scale=1, min_width=100)
-                category_components.append((acc, cg, toggle_btn))
     with gr.Group():
-        ner_output = gr.CheckboxGroup(choices=TRADITIONAL_NER_LABELS, value=TRADITIONAL_NER_LABELS, label="Standard Search Terms")
-        toggle_ner_btn = gr.Button("Deselect All", size="sm")
     with gr.Group():
-        custom_labels = gr.Textbox(label="Add Your Own Keywords (Optional)", placeholder="e.g., Technology, Weapon... (separated by commas)")
-    threshold_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.4, step=0.05, label="Confidence Threshold", info="Controls how 'sure' the AI needs to be. Lower finds more potential matches, higher finds only the most certain ones.")
-    text_input = gr.Textbox(label="Paste Your Full Text Here for Analysis", lines=10)
-    match_btn = gr.Button("Find Keywords in Text", variant="primary")
-    # --- NEW: Add state variables to hold data between function calls ---
-    # This holds the original text for updates
-    text_state = gr.State()
-    # This holds the results DataFrame for updates and downloads
-    dataframe_state = gr.State()
     with gr.Tabs():
         with gr.TabItem("Highlighted Text"):
-            matched_output = gr.HighlightedText(
-                label="Keyword Matches",
-                interactive=True,
-                show_legend=True
-            )
         with gr.TabItem("Detailed Results"):
-            # --- CHANGE: Using gr.DataFrame for a clean table output ---
-            detailed_results_output = gr.DataFrame(
-                headers=["Category", "Found Phrase", "Occurrences"],
-                datatype=["str", "str", "number"],
-                wrap=True,
-                label="Aggregated Results"
-            )
-            # --- NEW: Download button and hidden file component ---
-            download_button = gr.Button("Download Results as CSV", visible=False)
-            download_file = gr.File(label="Download", visible=False)
         with gr.TabItem("Debug Info"):
             debug_output = gr.Textbox(label="Extraction Log", interactive=False, lines=8)
     # --- Backend Functions ---
-    # --- THIS IS THE MISSING FUNCTION THAT WAS ADDED ---
-    def generate_from_prompt(prompt, provider, key_dict):
-        """Calls the appropriate LLM API based on the selected provider."""
-        provider_id = MODEL_OPTIONS.get(provider)
-        if provider_id == "openai":
-            client = openai.OpenAI(api_key=key_dict["openai_key"])
-            response = client.chat.completions.create(
-                model="gpt-4o",
-                messages=[{"role": "user", "content": prompt}]
-            )
-            return response.choices[0].message.content
-        elif provider_id == "anthropic":
-            client = anthropic.Anthropic(api_key=key_dict["anthropic_key"])
-            response = client.messages.create(
-                model="claude-3-opus-20240229",
-                max_tokens=1024,
-                messages=[{"role": "user", "content": prompt}]
-            )
-            return response.content[0].text
-        elif provider_id == "google":
-            genai.configure(api_key=key_dict["google_key"])
-            model = genai.GenerativeModel('gemini-1.5-pro-latest')
-            response = model.generate_content(prompt)
-            return response.text
-        else:
-            raise ValueError("Invalid provider selected")
     def handle_generate(topic, provider, openai_k, anthropic_k, google_k):
-        # ... (This function remains unchanged) ...
-        yield {generate_btn: gr.update(value="Consulting the Archives...", interactive=False)}
         try:
-            key_dict = {"openai_key": os.environ.get("OPENAI_API_KEY", openai_k), "anthropic_key": os.environ.get("ANTHROPIC_API_KEY", anthropic_k), "google_key": os.environ.get("GOOGLE_API_KEY", google_k)}
             provider_id = MODEL_OPTIONS.get(provider)
-            if not topic or not provider or not key_dict.get(f"{provider_id}_key"): raise gr.Error("Topic, Provider, and the correct API Key are required.")
             prompt = HIERARCHICAL_PROMPT_TEMPLATE.format(topic=topic)
             raw_framework = generate_from_prompt(prompt, provider, key_dict)
             framework = defaultdict(list)
             current_category = None
             for line in raw_framework.split('\n'):
                 line = line.strip()
-                if line.startswith("###"): current_category = line.replace("###", "").strip()
-                elif line.startswith("-") and current_category: framework[current_category].extend([e.strip() for e in line.replace("-", "").strip().split(',') if e.strip()])
-            if not framework: raise gr.Error("AI failed to generate categories. Please try again.")
             updates = {}
             categories = list(framework.items())
             for i in range(MAX_CATEGORIES):
-                accordion_comp, checkbox_comp, toggle_btn_comp = category_components[i]
                 if i < len(categories):
-                    category, entities = categories[i]
                     sorted_entities = sorted(list(set(entities)))
-                    updates[accordion_comp] = gr.update(label=category, visible=True)
-                    updates[checkbox_comp] = gr.update(choices=sorted_entities, value=sorted_entities, visible=True)
-                    updates[toggle_btn_comp] = gr.update(visible=True, value="Deselect All")
                 else:
                     updates[accordion_comp] = gr.update(visible=False)
                     updates[checkbox_comp] = gr.update(visible=False)
-                    updates[toggle_btn_comp] = gr.update(visible=False)
-            updates[generate_btn] = gr.update(value="Suggest Categories and Keywords", interactive=True)
             yield updates
         except Exception as e:
-            yield {generate_btn: gr.update(value="Suggest Categories and Keywords", interactive=True)}
             raise gr.Error(str(e))
-    # --- NEW: Helper function to process entities into a DataFrame ---
-    def process_entities_to_df(entities, original_text):
-        """Takes a list of entities and the original text, and returns a pandas DataFrame."""
-        if not entities:
-            return pd.DataFrame(columns=["Category", "Found Phrase", "Occurrences"])
-        # Extract text for each entity
-        found_phrases = []
-        for ent in entities:
-            found_phrases.append({
-                "Category": ent['entity'],
-                "Found Phrase": original_text[ent['start']:ent['end']]
-            })
-        if not found_phrases:
-            return pd.DataFrame(columns=["Category", "Found Phrase", "Occurrences"])
-        # Aggregate using pandas
-        df = pd.DataFrame(found_phrases)
-        aggregated_df = df.groupby(["Category", "Found Phrase"]).size().reset_index(name="Occurrences")
-        aggregated_df = aggregated_df.sort_values(by=["Category", "Occurrences"], ascending=[True, False])
-        return aggregated_df
-    # --- UPDATED: `match_entities` now uses pandas and updates state ---
-    def match_entities(text, ner_labels, custom_label_text, threshold, *selected_keywords, progress=gr.Progress(track_tqdm=True)):
         yield {
-            match_btn: gr.update(value="Searching...", interactive=False),
             detailed_results_output: None,
-            download_button: gr.update(visible=False),
-            download_file: gr.update(visible=False)
         }
-        if gliner_model is None: raise gr.Error("GLiNER model failed to load.")
         labels_to_use = set()
-        if ner_labels: labels_to_use.update(ner_labels)
-        for group in selected_keywords:
             if group: labels_to_use.update(group)
         custom = {l.strip() for l in custom_label_text.split(',') if l.strip()}
         if custom: labels_to_use.update(custom)
-        final_labels = sorted(list(labels_to_use))
-        debug_info = [f"🧠 Searching for {len(final_labels)} unique keywords.", f"⚙️ Confidence Threshold: {threshold}"]
         if not text or not final_labels:
-            yield {match_btn: gr.update(value="Find Keywords in Text", interactive=True)}
             return
         all_entities = []
-        chunk_size, overlap = 1000, 50
-        for i in progress.tqdm(range(0, len(text), chunk_size - overlap), desc="Scanning Text..."):
             chunk = text[i : i + chunk_size]
             chunk_entities = gliner_model.predict_entities(chunk, final_labels, threshold=threshold)
             for ent in chunk_entities:
-                ent['start'] += i; ent['end'] += i
                 all_entities.append(ent)
         unique_entities = [dict(t) for t in {tuple(d.items()) for d in all_entities}]
-        debug_info.append(f"📊 Found {len(unique_entities)} unique matches.")
-        highlighted_entities = [{"start": ent["start"], "end": ent["end"], "label": ent["label"]} for ent in unique_entities]
-        # --- NEW: Use helper to create DataFrame ---
-        results_df = process_entities_to_df(highlighted_entities, text)
         yield {
-            match_btn: gr.update(value="Find Keywords in Text", interactive=True),
-            matched_output: {"text": text, "entities": highlighted_entities},
-            detailed_results_output: results_df,
-            debug_output: "\n".join(debug_info),
-            download_button: gr.update(visible=True if not results_df.empty else False),
-            text_state: text, # Store original text in state
-            dataframe_state: results_df # Store dataframe in state
         }
-    # --- NEW: Function to update results when highlighted text is edited ---
-    def update_detailed_results(new_highlighted_entities, original_text):
-        """
-        This function is triggered when the user edits the HighlightedText component.
-        It re-calculates the DataFrame and updates the UI.
-        """
-        # new_highlighted_entities is the full value of the component, not just a diff
-        # In Gradio > 4, the format is a list of dictionaries with 'entity', 'start', 'end'
-        results_df = process_entities_to_df(new_highlighted_entities, original_text)
-        return {
-            detailed_results_output: results_df,
-            dataframe_state: results_df, # Update the state for the download button
-            download_button: gr.update(visible=True if not results_df.empty else False),
-        }
-    # --- NEW: Function to handle the file download ---
-    def download_results_as_csv(df):
-        """Saves the DataFrame to a temporary CSV file and returns its path."""
-        with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv', encoding='utf-8') as tmp:
-            df.to_csv(tmp.name, index=False)
-            return gr.update(value=tmp.name, visible=True)
-    # --- Event Wiring ---
-    def handle_toggle_click(button_text, all_choices):
-        if button_text == "Select All": return gr.update(value=all_choices), gr.update(value="Deselect All")
-        else: return gr.update(value=[]), gr.update(value="Select All")
-    def update_button_on_check(selections):
-        return gr.update(value="Select All") if not selections else gr.update(value="Deselect All")
-    submit_event_args = {"fn": handle_generate, "inputs": [topic, provider, openai_key, anthropic_key, google_key], "outputs": [generate_btn] + [comp for pair in category_components for comp in pair]}
-    generate_btn.click(**submit_event_args)
-    topic.submit(**submit_event_args)
-    toggle_ner_btn.click(fn=handle_toggle_click, inputs=[toggle_ner_btn, gr.State(TRADITIONAL_NER_LABELS)], outputs=[ner_output, toggle_ner_btn])
-    ner_output.change(fn=update_button_on_check, inputs=[ner_output], outputs=[toggle_ner_btn])
-    def create_toggle_handler(cg_component):
-        # We need a closure to capture the correct cg_component for each button
-        def handler(button_text):
-            # Gradio provides the component's choices at runtime, so we can access them here
-            return handle_toggle_click(button_text, cg_component.choices)
-        return handler
-    for acc, cg, toggle_btn in category_components:
-        # Note: We pass the component itself to gr.State to get its properties in the handler
-        toggle_btn.click(
-            fn=lambda btn_txt, choices: handle_toggle_click(btn_txt, choices),
-            inputs=[toggle_btn, gr.State(cg.choices)],
-            outputs=[cg, toggle_btn]
-        )
-        cg.change(fn=update_button_on_check, inputs=[cg], outputs=[toggle_btn])
-    match_btn.click(
-        fn=match_entities,
-        inputs=[text_input, ner_output, custom_labels, threshold_slider] + [cg for acc, cg, btn in category_components],
-        # --- CHANGE: Added new state and download components to outputs ---
-        outputs=[match_btn, matched_output, detailed_results_output, debug_output, download_button, download_file, text_state, dataframe_state]
     )
-    # --- NEW: Wire up the dynamic update and download events ---
-    matched_output.change(
-        fn=update_detailed_results,
-        inputs=[matched_output, text_state],
-        outputs=[detailed_results_output, dataframe_state, download_button]
-    )
-    download_button.click(
-        fn=download_results_as_csv,
-        inputs=[dataframe_state],
-        outputs=[download_file]
     )
 demo.launch(share=True, debug=True)

 # 📚 Install dependencies
 # Make sure to run this in your environment if you haven't already
+# !pip install openai anthropic google-generativeai gradio transformers torch gliner --quiet
 # ⚙️ Imports
 import openai
 import google.generativeai as genai
 import gradio as gr
 from gliner import GLiNER
+import traceback
 from collections import defaultdict, Counter
+import numpy as np # For calculating average score
 import os
 # 🧠 Supported models and their providers
 MODEL_OPTIONS = {
 # --- Load the model only once at startup ---
 try:
+    print("Loading AI Detective (GLiNER model)... This may take a moment.")
     gliner_model = GLiNER.from_pretrained(GLINER_MODEL_NAME)
+    print("AI Detective loaded successfully.")
 except Exception as e:
     print(f"FATAL ERROR: Could not load GLiNER model. The app will not be able to find entities. Error: {e}")
     gliner_model = None
+# 🧠 Prompt for the Creative AI to generate label ideas
+HIERARCHICAL_PROMPT_TEMPLATE = """
+You are a helpful research assistant. For the historical topic: **"{topic}"**, your job is to suggest a research framework.
+**Instructions:**
+1.  First, think of 4-6 **Conceptual Categories** that are useful for analyzing this topic (e.g., 'Forms of Protest', 'Key Demands'). These will become the labels.
+2.  For each category, list specific **Examples** someone could search for in a text.
+3.  **Crucial Rule for Labels:** Use the most basic, fundamental form (e.g., `Petition`, not `Political Petition`).
+**Output Format:**
+Use Markdown. Each category must be a Level 3 Header (###), followed by a comma-separated list of its examples.
+### Example Category 1
+- Example A, Example B, Example C
+### Example Category 2
+- Example D, Example E
+"""
+# 🧠 Generator Function (The "Creative Brain")
+def generate_from_prompt(prompt, provider, key_dict):
+    provider_id = MODEL_OPTIONS.get(provider)
+    api_key = key_dict.get(f"{provider_id}_key")
+    if not api_key:
+        raise ValueError(f"API key for {provider} not found.")
+    if provider_id == "openai":
+        client = openai.OpenAI(api_key=api_key)
+        response = client.chat.completions.create(model="gpt-4o", messages=[{"role": "user", "content": prompt}], temperature=0.2)
+        return response.choices[0].message.content.strip()
+    elif provider_id == "anthropic":
+        client = anthropic.Anthropic(api_key=api_key)
+        response = client.messages.create(model="claude-3-opus-20240229", max_tokens=1024, messages=[{"role": "user", "content": prompt}])
+        return response.content[0].text.strip()
+    elif provider_id == "google":
+        genai.configure(api_key=api_key)
+        model = genai.GenerativeModel('gemini-1.5-pro-latest')
+        response = model.generate_content(prompt)
+        return response.text.strip()
+    return ""
+# --- UI Definitions ---
+# A list of standard, common labels the user can always choose from
+STANDARD_LABELS = [
+    "PERSON", "ORGANIZATION", "LOCATION", "COUNTRY", "CITY", "STATE",
+    "NATIONALITY", "GROUP", "DATE", "EVENT", "LAW", "LEGAL_DOCUMENT",
+    "PRODUCT", "FACILITY", "WORK_OF_ART", "LANGUAGE", "TIME", "PERCENTAGE",
+    "MONEY", "CURRENCY", "QUANTITY", "ORDINAL_NUMBER", "CARDINAL_NUMBER"
+]
+MAX_CATEGORIES = 8 # The maximum number of AI-suggested categories to show
+with gr.Blocks(title="Smart Text Analyzer", css=".prose { word-break: break-word; }") as demo:
+    gr.Markdown("# Smart Text Analyzer")
+    gr.Markdown(
+        """
+        Welcome! Paste your text below to automatically find and highlight key information. It's like having two smart assistants read your document for you.
+        ### How It Works: Two Brains are Better Than One!
+        We use two different types of AI to give you the best results.
+        🧠 **1. The Creative Brain (Generative AI - like GPT)**
+        This AI is a brainstormer. It reads your topic to understand the context, then *imagines* and *suggests* useful labels that fit your document. It helps you discover what to look for!
+        🕵️ **2. The Detective (Extractive AI - GLiNER)**
+        This AI is a precise detective. Once you give it a list of labels, it meticulously scans the text and *pulls out* (extracts) the exact words that match. It's fantastic at finding specific information with high accuracy.
+        """
+    )
+    gr.Markdown("--- \n## Step 1: Get Label Ideas from the Creative AI")
     with gr.Row():
+        topic = gr.Textbox(label="Enter a Topic", placeholder="e.g., The Chartist Movement, The Protestant Reformation")
+        provider = gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), label="Choose Creative AI Model")
     with gr.Row():
         openai_key = gr.Textbox(label="OpenAI API Key", type="password")
         anthropic_key = gr.Textbox(label="Anthropic API Key", type="password")
         google_key = gr.Textbox(label="Google API Key", type="password")
+    generate_btn = gr.Button("Generate Label Suggestions", variant="primary")
+    gr.Markdown("--- \n## Step 2: Build Your Search & Analyze Text")
+    gr.Markdown(
+        """
+        ### What are Entities or Labels?
+        Think of them as special highlighters! They find and color-code specific types of information in your text, like `PERSON`, `DATE`, `LOCATION`, or custom things you define.
+        """
+    )
+    gr.Markdown("#### 1. Review AI-Suggested Labels")
+    gr.Markdown("The AI's suggestions appear below. Uncheck any you don't want.")
+    dynamic_components = []
     with gr.Column():
         for i in range(MAX_CATEGORIES):
+            with gr.Accordion(f"Suggested Label Category {i+1}", visible=False) as acc:
                 with gr.Row():
+                    # The CheckboxGroup holds the actual labels (e.g., "Protest", "Petition")
+                    cg = gr.CheckboxGroup(label="Labels in this category", interactive=True, container=False, scale=4)
+                    deselect_btn = gr.Button("Deselect All", size="sm", scale=1, min_width=80)
+                dynamic_components.append((acc, cg, deselect_btn))
+    gr.Markdown("#### 2. Include Standard Labels (Optional)")
     with gr.Group():
+        standard_labels_checkbox = gr.CheckboxGroup(choices=STANDARD_LABELS, value=STANDARD_LABELS, label="Standard Entity Labels", info="Common categories like people, places, and dates.")
+        with gr.Row():
+            select_all_std_btn = gr.Button("Select All", size="sm")
+            deselect_all_std_btn = gr.Button("Deselect All", size="sm")
+    gr.Markdown("#### 3. Add Your Own Custom Labels (Optional)")
     with gr.Group():
+        custom_labels_textbox = gr.Textbox(label="Enter Custom Labels (comma-separated)", placeholder="e.g., Technology, Weapon, Secret Society...")
+    gr.Markdown("--- \n## Step 3: Analyze Your Document")
+    threshold_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.4, step=0.05, label="Confidence Threshold", info="Controls how strict the AI Detective is. Lower to find more matches. Higher for fewer, more precise matches.")
+    text_input = gr.Textbox(label="Paste Your Full Text Here for Analysis", lines=10, placeholder="Paste a historical document, an article, or a chapter...")
+    analyze_btn = gr.Button("Analyze Text & Find Entities", variant="primary")
+    analysis_status = gr.Markdown(visible=False) # For the "Analyzing..." message
+    gr.Markdown("--- \n## Step 4: Review Your Results")
+    gr.Markdown(
+        """
+        ✨ **Pro Tip: Create Your Own Labels!**
+        Did our AI miss something? In the **"Highlighted Text"** view below, simply **click and drag to highlight any piece of text**. A small box will appear, allowing you to name and add your own custom label!
+        """
+    )
     with gr.Tabs():
         with gr.TabItem("Highlighted Text"):
+            highlighted_text_output = gr.HighlightedText(label="Found Entities", interactive=True)
         with gr.TabItem("Detailed Results"):
+            detailed_results_output = gr.Markdown(label="List of Found Entities by Label")
         with gr.TabItem("Debug Info"):
             debug_output = gr.Textbox(label="Extraction Log", interactive=False, lines=8)
     # --- Backend Functions ---
     def handle_generate(topic, provider, openai_k, anthropic_k, google_k):
+        yield {
+            generate_btn: gr.update(value="🧠 Generating suggestions...", interactive=False)
+        }
         try:
+            key_dict = {
+                "openai_key": os.environ.get("OPENAI_API_KEY", openai_k),
+                "anthropic_key": os.environ.get("ANTHROPIC_API_KEY", anthropic_k),
+                "google_key": os.environ.get("GOOGLE_API_KEY", google_k)
+            }
             provider_id = MODEL_OPTIONS.get(provider)
+            if not topic or not provider or not key_dict.get(f"{provider_id}_key"):
+                raise gr.Error("Topic, Provider, and the correct API Key are required.")
             prompt = HIERARCHICAL_PROMPT_TEMPLATE.format(topic=topic)
             raw_framework = generate_from_prompt(prompt, provider, key_dict)
+            # This parsing is simplified for the new structure
             framework = defaultdict(list)
             current_category = None
             for line in raw_framework.split('\n'):
                 line = line.strip()
+                if line.startswith("###"):
+                    current_category = line.replace("###", "").strip()
+                elif line.startswith("-") and current_category:
+                    entities = line.replace("-", "").strip()
+                    framework[current_category].extend([e.strip() for e in entities.split(',') if e.strip()])
+            if not framework:
+                raise gr.Error("AI failed to generate categories. Please try again or rephrase your topic.")
             updates = {}
             categories = list(framework.items())
             for i in range(MAX_CATEGORIES):
+                accordion_comp, checkbox_comp, button_comp = dynamic_components[i]
                 if i < len(categories):
+                    category_name, entities = categories[i]
+                    # The labels are the entities themselves, grouped by the category name
                     sorted_entities = sorted(list(set(entities)))
+                    updates[accordion_comp] = gr.update(label=f"Category: {category_name}", visible=True)
+                    updates[checkbox_comp] = gr.update(choices=sorted_entities, value=sorted_entities, label="Suggested Labels", visible=True)
+                    updates[button_comp] = gr.update(visible=True)
                 else:
                     updates[accordion_comp] = gr.update(visible=False)
                     updates[checkbox_comp] = gr.update(visible=False)
+                    updates[button_comp] = gr.update(visible=False)
+            updates[generate_btn] = gr.update(value="Generate Label Suggestions", interactive=True)
             yield updates
         except Exception as e:
+            yield {generate_btn: gr.update(value="Generate Label Suggestions", interactive=True)}
             raise gr.Error(str(e))
+    def analyze_text_and_find_entities(text, standard_labels, custom_label_text, threshold, *suggested_labels_from_groups):
+        # --- 1. Show Progress to User ---
         yield {
+            analyze_btn: gr.update(value="🕵️ Analyzing...", interactive=False),
+            analysis_status: gr.update(value="Our AI Detective is scanning your text. This may take a moment...", visible=True),
+            highlighted_text_output: None,
             detailed_results_output: None,
+            debug_output: "Starting analysis..."
         }
+        debug_info = []
+        if gliner_model is None:
+            raise gr.Error("GLiNER model failed to load at startup. Cannot analyze text. Please check logs.")
+        # --- 2. Collect All Labels from UI ---
         labels_to_use = set()
+        # Add labels from the dynamically generated suggestion groups
+        for group in suggested_labels_from_groups:
             if group: labels_to_use.update(group)
+        # Add labels from the standard list
+        if standard_labels: labels_to_use.update(standard_labels)
+        # Add labels from the custom textbox
         custom = {l.strip() for l in custom_label_text.split(',') if l.strip()}
         if custom: labels_to_use.update(custom)
+        final_labels = sorted(list(labels_to_use))
+        debug_info.append(f"🧠 Searching for {len(final_labels)} unique labels.")
+        debug_info.append(f"⚙️ Confidence Threshold: {threshold}")
         if not text or not final_labels:
+            yield {
+                analyze_btn: gr.update(value="Analyze Text & Find Entities", interactive=True),
+                analysis_status: gr.update(visible=False),
+                highlighted_text_output: {"text": text, "entities": []},
+                detailed_results_output: "Please provide text and select at least one label to search for.",
+                debug_output: "Analysis stopped: No text or no labels provided."
+            }
             return
+        # --- 3. Run the GLiNER Model (The "Detective") ---
         all_entities = []
+        # Process text in chunks to handle very long documents
+        chunk_size, overlap = 1024, 100
+        for i in range(0, len(text), chunk_size - overlap):
             chunk = text[i : i + chunk_size]
             chunk_entities = gliner_model.predict_entities(chunk, final_labels, threshold=threshold)
             for ent in chunk_entities:
+                ent['start'] += i
+                ent['end'] += i
                 all_entities.append(ent)
+        # Deduplicate entities that might span across chunk overlaps
         unique_entities = [dict(t) for t in {tuple(d.items()) for d in all_entities}]
+        debug_info.append(f"📊 Found {len(unique_entities)} raw entity mentions.")
+        # --- 4. Prepare Highlighted Text Output ---
+        highlighted_output_data = {
+            "text": text,
+            "entities": [{"start": ent["start"], "end": ent["end"], "label": ent["label"]} for ent in unique_entities]
+        }
+        # --- 5. Prepare Detailed Table-Based Results ---
+        aggregated_matches = defaultdict(lambda: {'count': 0, 'scores': [], 'original_casing': ''})
+        for ent in unique_entities:
+            match_text = text[ent['start']:ent['end']]
+            # Use a key of (label, lowercase_text) to group similar items
+            key = (ent['label'], match_text.lower())
+            aggregated_matches[key]['count'] += 1
+            aggregated_matches[key]['scores'].append(ent['score'])
+            # Store the first-seen casing of the text
+            if not aggregated_matches[key]['original_casing']:
+                aggregated_matches[key]['original_casing'] = match_text
+        # Group aggregated results by label for final display
+        results_by_label = defaultdict(list)
+        for (label, _), data in aggregated_matches.items():
+            avg_score = np.mean(data['scores'])
+            results_by_label[label].append({
+                'text': data['original_casing'],
+                'count': data['count'],
+                'avg_score': avg_score
+            })
+        # --- 6. Build the Markdown String for the Detailed Table ---
+        markdown_string = ""
+        for label, items in sorted(results_by_label.items()):
+            markdown_string += f"### {label}\n"
+            markdown_string += "| Text Found | Instances Found | Avg. Confidence Score* |\n"
+            markdown_string += "|------------|-----------------|--------------------------|\n"
+            # Sort items by count (most frequent first)
+            for item in sorted(items, key=lambda x: x['count'], reverse=True):
+                markdown_string += f"| {item['text']} | {item['count']} | {item['avg_score']:.2f} |\n"
+            markdown_string += "\n"
+        if not markdown_string:
+            markdown_string = "No entities found. Try lowering the confidence threshold or changing your labels."
+        else:
+            markdown_string += "\n---\n<small><i>*<b>Confidence Score:</b> How sure the AI Detective (GLiNER) is that it found the correct label (1.00 = 100% certain). The score shown is the average across all instances of that text.</i></small>"
+        debug_info.append("✅ Analysis complete.")
+        # --- 7. Yield Final Results to UI ---
         yield {
+            analyze_btn: gr.update(value="Analyze Text & Find Entities", interactive=True),
+            analysis_status: gr.update(visible=False),
+            highlighted_text_output: highlighted_output_data,
+            detailed_results_output: markdown_string,
+            debug_output: "\n".join(debug_info)
         }
+    # --- Wire up UI events ---
+    generate_btn.click(
+        fn=handle_generate,
+        inputs=[topic, provider, openai_key, anthropic_key, google_key],
+        outputs=[generate_btn] + [comp for pair in dynamic_components for comp in pair]
     )
+    # Functions for Select/Deselect All buttons
+    def deselect_all():
+        return gr.update(value=[])
+    def select_all(choices):
+        return gr.update(value=choices)
+    deselect_all_std_btn.click(fn=deselect_all, inputs=None, outputs=[standard_labels_checkbox])
+    select_all_std_btn.click(lambda: select_all(STANDARD_LABELS), inputs=None, outputs=[standard_labels_checkbox])
+    for _, cg, btn in dynamic_components:
+        btn.click(fn=deselect_all, inputs=None, outputs=[cg])
+    analyze_btn.click(
+        fn=analyze_text_and_find_entities,
+        inputs=[text_input, standard_labels_checkbox, custom_labels_textbox, threshold_slider] + [cg for acc, cg, btn in dynamic_components],
+        outputs=[analyze_btn, analysis_status, highlighted_text_output, detailed_results_output, debug_output]
     )
 demo.launch(share=True, debug=True)