import gradio as gr import networkx as nx import matplotlib.pyplot as plt import spacy import pandas as pd import numpy as np from pathlib import Path # Load SpaCy model nlp = spacy.load("en_core_web_sm") # Categories and their colors CATEGORIES = { "Main Themes": "#004d99", "Events": "#006400", "People": "#8b4513", "Laws/Policies": "#4b0082", "Concepts": "#800000" } def load_historical_data(): """Load and process the Unit 5 text data.""" try: with open("Unit5_OCR.txt", "r", encoding="utf-8") as f: content = f.read() return content except FileNotFoundError: return "Historical data file not found." def extract_entities(text): """Extract named entities and important terms from text.""" doc = nlp(text) entities = {} # Extract named entities for ent in doc.ents: if ent.label_ in ["PERSON", "EVENT", "DATE", "LAW", "ORG"]: if ent.text not in entities: entities[ent.text] = { "type": ent.label_, "count": 1, "context": [] } else: entities[ent.text]["count"] += 1 return entities def find_related_terms(term, text, window_size=100): """Find terms that appear near the search term.""" term = term.lower() text = text.lower() related = {} # Find all occurrences of the term index = text.find(term) while index != -1: # Get surrounding context start = max(0, index - window_size) end = min(len(text), index + len(term) + window_size) context = text[start:end] # Process context to find other entities doc = nlp(context) for ent in doc.ents: if ent.text.lower() != term: if ent.text not in related: related[ent.text] = { "type": ent.label_, "count": 1, "relevance": 1.0 } else: related[ent.text]["count"] += 1 related[ent.text]["relevance"] += 0.5 index = text.find(term, index + 1) return related def generate_context_map(term): """Generate a network visualization for the given term.""" if not term.strip(): return None # Load historical data content = load_historical_data() if content == "Historical data file not found.": return None # Create network graph G = nx.Graph() # Find related terms related_items = find_related_terms(term, content) # Add central node G.add_node(term, category="Main Themes") # Add related nodes (limit to top 10 by relevance) sorted_items = sorted(related_items.items(), key=lambda x: x[1]["relevance"], reverse=True)[:10] for item_name, item_data in sorted_items: G.add_node(item_name, category=item_data["type"]) G.add_edge(term, item_name, weight=item_data["relevance"], length=2.0/item_data["relevance"]) # Create visualization plt.figure(figsize=(12, 12)) plt.clf() # Set up the layout pos = nx.spring_layout(G, k=1, iterations=50) # Draw nodes for category, color in CATEGORIES.items(): nodes = [node for node, attr in G.nodes(data=True) if attr.get("category", "") == category] nx.draw_networkx_nodes(G, pos, nodelist=nodes, node_color=color, node_size=2000) # Draw edges nx.draw_networkx_edges(G, pos, edge_color='white', width=1, alpha=0.5) # Add labels labels = {node: node for node in G.nodes()} nx.draw_networkx_labels(G, pos, labels, font_size=8, font_color='white') # Set dark background plt.gca().set_facecolor('#1a1a1a') plt.gcf().set_facecolor('#1a1a1a') # Add title plt.title(f"Historical Context Map for '{term}'", color='white', pad=20) return plt.gcf() # Create Gradio interface iface = gr.Interface( fn=generate_context_map, inputs=gr.Textbox(label="Enter a historical term from Unit 5", placeholder="e.g., Civil War, Abraham Lincoln, Reconstruction"), outputs=gr.Plot(), title="Historical Context Mapper", description="This tool generates a network visualization showing the historical context and connections for terms from Unit 5 (1844-1877).", theme="darkhuggingface", examples=[ ["Civil War"], ["Abraham Lincoln"], ["Reconstruction"], ["Manifest Destiny"], ["Transcontinental Railroad"] ] ) if __name__ == "__main__": iface.launch()