Spaces:
Sleeping
Sleeping
import gradio as gr | |
import networkx as nx | |
import matplotlib.pyplot as plt | |
import spacy | |
import pandas as pd | |
import numpy as np | |
from pathlib import Path | |
# Load SpaCy model | |
nlp = spacy.load("en_core_web_sm") | |
# Categories and their colors | |
CATEGORIES = { | |
"Main Themes": "#004d99", | |
"Events": "#006400", | |
"People": "#8b4513", | |
"Laws/Policies": "#4b0082", | |
"Concepts": "#800000" | |
} | |
def load_historical_data(): | |
"""Load and process the Unit 5 text data.""" | |
try: | |
with open("Unit5_OCR.txt", "r", encoding="utf-8") as f: | |
content = f.read() | |
return content | |
except FileNotFoundError: | |
return "Historical data file not found." | |
def extract_entities(text): | |
"""Extract named entities and important terms from text.""" | |
doc = nlp(text) | |
entities = {} | |
# Extract named entities | |
for ent in doc.ents: | |
if ent.label_ in ["PERSON", "EVENT", "DATE", "LAW", "ORG"]: | |
if ent.text not in entities: | |
entities[ent.text] = { | |
"type": ent.label_, | |
"count": 1, | |
"context": [] | |
} | |
else: | |
entities[ent.text]["count"] += 1 | |
return entities | |
def find_related_terms(term, text, window_size=100): | |
"""Find terms that appear near the search term.""" | |
term = term.lower() | |
text = text.lower() | |
related = {} | |
# Find all occurrences of the term | |
index = text.find(term) | |
while index != -1: | |
# Get surrounding context | |
start = max(0, index - window_size) | |
end = min(len(text), index + len(term) + window_size) | |
context = text[start:end] | |
# Process context to find other entities | |
doc = nlp(context) | |
for ent in doc.ents: | |
if ent.text.lower() != term: | |
if ent.text not in related: | |
related[ent.text] = { | |
"type": ent.label_, | |
"count": 1, | |
"relevance": 1.0 | |
} | |
else: | |
related[ent.text]["count"] += 1 | |
related[ent.text]["relevance"] += 0.5 | |
index = text.find(term, index + 1) | |
return related | |
def generate_context_map(term): | |
"""Generate a network visualization for the given term.""" | |
if not term.strip(): | |
return None | |
# Load historical data | |
content = load_historical_data() | |
if content == "Historical data file not found.": | |
return None | |
# Create network graph | |
G = nx.Graph() | |
# Find related terms | |
related_items = find_related_terms(term, content) | |
# Add central node | |
G.add_node(term, category="Main Themes") | |
# Add related nodes (limit to top 10 by relevance) | |
sorted_items = sorted(related_items.items(), | |
key=lambda x: x[1]["relevance"], | |
reverse=True)[:10] | |
for item_name, item_data in sorted_items: | |
G.add_node(item_name, category=item_data["type"]) | |
G.add_edge(term, item_name, | |
weight=item_data["relevance"], | |
length=2.0/item_data["relevance"]) | |
# Create visualization | |
plt.figure(figsize=(12, 12)) | |
plt.clf() | |
# Set up the layout | |
pos = nx.spring_layout(G, k=1, iterations=50) | |
# Draw nodes | |
for category, color in CATEGORIES.items(): | |
nodes = [node for node, attr in G.nodes(data=True) | |
if attr.get("category", "") == category] | |
nx.draw_networkx_nodes(G, pos, nodelist=nodes, | |
node_color=color, | |
node_size=2000) | |
# Draw edges | |
nx.draw_networkx_edges(G, pos, edge_color='white', | |
width=1, alpha=0.5) | |
# Add labels | |
labels = {node: node for node in G.nodes()} | |
nx.draw_networkx_labels(G, pos, labels, font_size=8, | |
font_color='white') | |
# Set dark background | |
plt.gca().set_facecolor('#1a1a1a') | |
plt.gcf().set_facecolor('#1a1a1a') | |
# Add title | |
plt.title(f"Historical Context Map for '{term}'", | |
color='white', pad=20) | |
return plt.gcf() | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=generate_context_map, | |
inputs=gr.Textbox(label="Enter a historical term from Unit 5", | |
placeholder="e.g., Civil War, Abraham Lincoln, Reconstruction"), | |
outputs=gr.Plot(), | |
title="Historical Context Mapper", | |
description="This tool generates a network visualization showing the historical context and connections for terms from Unit 5 (1844-1877).", | |
theme="darkhuggingface", | |
examples=[ | |
["Civil War"], | |
["Abraham Lincoln"], | |
["Reconstruction"], | |
["Manifest Destiny"], | |
["Transcontinental Railroad"] | |
] | |
) | |
if __name__ == "__main__": | |
iface.launch() |