Spaces:

ambrosfitz
/

history_map

Sleeping

File size: 4,944 Bytes
import gradio as gr
import networkx as nx
import matplotlib.pyplot as plt
import spacy
import pandas as pd
import numpy as np
from pathlib import Path

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Categories and their colors
CATEGORIES = {
    "Main Themes": "#004d99",
    "Events": "#006400",
    "People": "#8b4513",
    "Laws/Policies": "#4b0082",
    "Concepts": "#800000"
}

def load_historical_data():
    """Load and process the Unit 5 text data."""
    try:
        with open("Unit5_OCR.txt", "r", encoding="utf-8") as f:
            content = f.read()
        return content
    except FileNotFoundError:
        return "Historical data file not found."

def extract_entities(text):
    """Extract named entities and important terms from text."""
    doc = nlp(text)
    entities = {}
    
    # Extract named entities
    for ent in doc.ents:
        if ent.label_ in ["PERSON", "EVENT", "DATE", "LAW", "ORG"]:
            if ent.text not in entities:
                entities[ent.text] = {
                    "type": ent.label_,
                    "count": 1,
                    "context": []
                }
            else:
                entities[ent.text]["count"] += 1
    
    return entities

def find_related_terms(term, text, window_size=100):
    """Find terms that appear near the search term."""
    term = term.lower()
    text = text.lower()
    related = {}
    
    # Find all occurrences of the term
    index = text.find(term)
    while index != -1:
        # Get surrounding context
        start = max(0, index - window_size)
        end = min(len(text), index + len(term) + window_size)
        context = text[start:end]
        
        # Process context to find other entities
        doc = nlp(context)
        for ent in doc.ents:
            if ent.text.lower() != term:
                if ent.text not in related:
                    related[ent.text] = {
                        "type": ent.label_,
                        "count": 1,
                        "relevance": 1.0
                    }
                else:
                    related[ent.text]["count"] += 1
                    related[ent.text]["relevance"] += 0.5
        
        index = text.find(term, index + 1)
    
    return related

def generate_context_map(term):
    """Generate a network visualization for the given term."""
    if not term.strip():
        return None
        
    # Load historical data
    content = load_historical_data()
    if content == "Historical data file not found.":
        return None
    
    # Create network graph
    G = nx.Graph()
    
    # Find related terms
    related_items = find_related_terms(term, content)
    
    # Add central node
    G.add_node(term, category="Main Themes")
    
    # Add related nodes (limit to top 10 by relevance)
    sorted_items = sorted(related_items.items(), 
                         key=lambda x: x[1]["relevance"], 
                         reverse=True)[:10]
    
    for item_name, item_data in sorted_items:
        G.add_node(item_name, category=item_data["type"])
        G.add_edge(term, item_name, 
                  weight=item_data["relevance"],
                  length=2.0/item_data["relevance"])
    
    # Create visualization
    plt.figure(figsize=(12, 12))
    plt.clf()
    
    # Set up the layout
    pos = nx.spring_layout(G, k=1, iterations=50)
    
    # Draw nodes
    for category, color in CATEGORIES.items():
        nodes = [node for node, attr in G.nodes(data=True) 
                if attr.get("category", "") == category]
        nx.draw_networkx_nodes(G, pos, nodelist=nodes, 
                             node_color=color, 
                             node_size=2000)
    
    # Draw edges
    nx.draw_networkx_edges(G, pos, edge_color='white', 
                          width=1, alpha=0.5)
    
    # Add labels
    labels = {node: node for node in G.nodes()}
    nx.draw_networkx_labels(G, pos, labels, font_size=8, 
                          font_color='white')
    
    # Set dark background
    plt.gca().set_facecolor('#1a1a1a')
    plt.gcf().set_facecolor('#1a1a1a')
    
    # Add title
    plt.title(f"Historical Context Map for '{term}'", 
              color='white', pad=20)
    
    return plt.gcf()

# Create Gradio interface
iface = gr.Interface(
    fn=generate_context_map,
    inputs=gr.Textbox(label="Enter a historical term from Unit 5",
                     placeholder="e.g., Civil War, Abraham Lincoln, Reconstruction"),
    outputs=gr.Plot(),
    title="Historical Context Mapper",
    description="This tool generates a network visualization showing the historical context and connections for terms from Unit 5 (1844-1877).",
    theme="darkhuggingface",
    examples=[
        ["Civil War"],
        ["Abraham Lincoln"],
        ["Reconstruction"],
        ["Manifest Destiny"],
        ["Transcontinental Railroad"]
    ]
)

if __name__ == "__main__":
    iface.launch()