File size: 4,944 Bytes
7361754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import gradio as gr
import networkx as nx
import matplotlib.pyplot as plt
import spacy
import pandas as pd
import numpy as np
from pathlib import Path

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Categories and their colors
CATEGORIES = {
    "Main Themes": "#004d99",
    "Events": "#006400",
    "People": "#8b4513",
    "Laws/Policies": "#4b0082",
    "Concepts": "#800000"
}

def load_historical_data():
    """Load and process the Unit 5 text data."""
    try:
        with open("Unit5_OCR.txt", "r", encoding="utf-8") as f:
            content = f.read()
        return content
    except FileNotFoundError:
        return "Historical data file not found."

def extract_entities(text):
    """Extract named entities and important terms from text."""
    doc = nlp(text)
    entities = {}
    
    # Extract named entities
    for ent in doc.ents:
        if ent.label_ in ["PERSON", "EVENT", "DATE", "LAW", "ORG"]:
            if ent.text not in entities:
                entities[ent.text] = {
                    "type": ent.label_,
                    "count": 1,
                    "context": []
                }
            else:
                entities[ent.text]["count"] += 1
    
    return entities

def find_related_terms(term, text, window_size=100):
    """Find terms that appear near the search term."""
    term = term.lower()
    text = text.lower()
    related = {}
    
    # Find all occurrences of the term
    index = text.find(term)
    while index != -1:
        # Get surrounding context
        start = max(0, index - window_size)
        end = min(len(text), index + len(term) + window_size)
        context = text[start:end]
        
        # Process context to find other entities
        doc = nlp(context)
        for ent in doc.ents:
            if ent.text.lower() != term:
                if ent.text not in related:
                    related[ent.text] = {
                        "type": ent.label_,
                        "count": 1,
                        "relevance": 1.0
                    }
                else:
                    related[ent.text]["count"] += 1
                    related[ent.text]["relevance"] += 0.5
        
        index = text.find(term, index + 1)
    
    return related

def generate_context_map(term):
    """Generate a network visualization for the given term."""
    if not term.strip():
        return None
        
    # Load historical data
    content = load_historical_data()
    if content == "Historical data file not found.":
        return None
    
    # Create network graph
    G = nx.Graph()
    
    # Find related terms
    related_items = find_related_terms(term, content)
    
    # Add central node
    G.add_node(term, category="Main Themes")
    
    # Add related nodes (limit to top 10 by relevance)
    sorted_items = sorted(related_items.items(), 
                         key=lambda x: x[1]["relevance"], 
                         reverse=True)[:10]
    
    for item_name, item_data in sorted_items:
        G.add_node(item_name, category=item_data["type"])
        G.add_edge(term, item_name, 
                  weight=item_data["relevance"],
                  length=2.0/item_data["relevance"])
    
    # Create visualization
    plt.figure(figsize=(12, 12))
    plt.clf()
    
    # Set up the layout
    pos = nx.spring_layout(G, k=1, iterations=50)
    
    # Draw nodes
    for category, color in CATEGORIES.items():
        nodes = [node for node, attr in G.nodes(data=True) 
                if attr.get("category", "") == category]
        nx.draw_networkx_nodes(G, pos, nodelist=nodes, 
                             node_color=color, 
                             node_size=2000)
    
    # Draw edges
    nx.draw_networkx_edges(G, pos, edge_color='white', 
                          width=1, alpha=0.5)
    
    # Add labels
    labels = {node: node for node in G.nodes()}
    nx.draw_networkx_labels(G, pos, labels, font_size=8, 
                          font_color='white')
    
    # Set dark background
    plt.gca().set_facecolor('#1a1a1a')
    plt.gcf().set_facecolor('#1a1a1a')
    
    # Add title
    plt.title(f"Historical Context Map for '{term}'", 
              color='white', pad=20)
    
    return plt.gcf()

# Create Gradio interface
iface = gr.Interface(
    fn=generate_context_map,
    inputs=gr.Textbox(label="Enter a historical term from Unit 5",
                     placeholder="e.g., Civil War, Abraham Lincoln, Reconstruction"),
    outputs=gr.Plot(),
    title="Historical Context Mapper",
    description="This tool generates a network visualization showing the historical context and connections for terms from Unit 5 (1844-1877).",
    theme="darkhuggingface",
    examples=[
        ["Civil War"],
        ["Abraham Lincoln"],
        ["Reconstruction"],
        ["Manifest Destiny"],
        ["Transcontinental Railroad"]
    ]
)

if __name__ == "__main__":
    iface.launch()