history_map / app.py
ambrosfitz's picture
Create app.py
7361754 verified
raw
history blame
4.94 kB
import gradio as gr
import networkx as nx
import matplotlib.pyplot as plt
import spacy
import pandas as pd
import numpy as np
from pathlib import Path
# Load SpaCy model
nlp = spacy.load("en_core_web_sm")
# Categories and their colors
CATEGORIES = {
"Main Themes": "#004d99",
"Events": "#006400",
"People": "#8b4513",
"Laws/Policies": "#4b0082",
"Concepts": "#800000"
}
def load_historical_data():
"""Load and process the Unit 5 text data."""
try:
with open("Unit5_OCR.txt", "r", encoding="utf-8") as f:
content = f.read()
return content
except FileNotFoundError:
return "Historical data file not found."
def extract_entities(text):
"""Extract named entities and important terms from text."""
doc = nlp(text)
entities = {}
# Extract named entities
for ent in doc.ents:
if ent.label_ in ["PERSON", "EVENT", "DATE", "LAW", "ORG"]:
if ent.text not in entities:
entities[ent.text] = {
"type": ent.label_,
"count": 1,
"context": []
}
else:
entities[ent.text]["count"] += 1
return entities
def find_related_terms(term, text, window_size=100):
"""Find terms that appear near the search term."""
term = term.lower()
text = text.lower()
related = {}
# Find all occurrences of the term
index = text.find(term)
while index != -1:
# Get surrounding context
start = max(0, index - window_size)
end = min(len(text), index + len(term) + window_size)
context = text[start:end]
# Process context to find other entities
doc = nlp(context)
for ent in doc.ents:
if ent.text.lower() != term:
if ent.text not in related:
related[ent.text] = {
"type": ent.label_,
"count": 1,
"relevance": 1.0
}
else:
related[ent.text]["count"] += 1
related[ent.text]["relevance"] += 0.5
index = text.find(term, index + 1)
return related
def generate_context_map(term):
"""Generate a network visualization for the given term."""
if not term.strip():
return None
# Load historical data
content = load_historical_data()
if content == "Historical data file not found.":
return None
# Create network graph
G = nx.Graph()
# Find related terms
related_items = find_related_terms(term, content)
# Add central node
G.add_node(term, category="Main Themes")
# Add related nodes (limit to top 10 by relevance)
sorted_items = sorted(related_items.items(),
key=lambda x: x[1]["relevance"],
reverse=True)[:10]
for item_name, item_data in sorted_items:
G.add_node(item_name, category=item_data["type"])
G.add_edge(term, item_name,
weight=item_data["relevance"],
length=2.0/item_data["relevance"])
# Create visualization
plt.figure(figsize=(12, 12))
plt.clf()
# Set up the layout
pos = nx.spring_layout(G, k=1, iterations=50)
# Draw nodes
for category, color in CATEGORIES.items():
nodes = [node for node, attr in G.nodes(data=True)
if attr.get("category", "") == category]
nx.draw_networkx_nodes(G, pos, nodelist=nodes,
node_color=color,
node_size=2000)
# Draw edges
nx.draw_networkx_edges(G, pos, edge_color='white',
width=1, alpha=0.5)
# Add labels
labels = {node: node for node in G.nodes()}
nx.draw_networkx_labels(G, pos, labels, font_size=8,
font_color='white')
# Set dark background
plt.gca().set_facecolor('#1a1a1a')
plt.gcf().set_facecolor('#1a1a1a')
# Add title
plt.title(f"Historical Context Map for '{term}'",
color='white', pad=20)
return plt.gcf()
# Create Gradio interface
iface = gr.Interface(
fn=generate_context_map,
inputs=gr.Textbox(label="Enter a historical term from Unit 5",
placeholder="e.g., Civil War, Abraham Lincoln, Reconstruction"),
outputs=gr.Plot(),
title="Historical Context Mapper",
description="This tool generates a network visualization showing the historical context and connections for terms from Unit 5 (1844-1877).",
theme="darkhuggingface",
examples=[
["Civil War"],
["Abraham Lincoln"],
["Reconstruction"],
["Manifest Destiny"],
["Transcontinental Railroad"]
]
)
if __name__ == "__main__":
iface.launch()