Spaces:

ambrosfitz
/

history_map

Sleeping

App Files Files Community

history_map / app.py

ambrosfitz

Create app.py

7361754 verified 3 months ago

raw

history blame

4.94 kB

	import gradio as gr
	import networkx as nx
	import matplotlib.pyplot as plt
	import spacy
	import pandas as pd
	import numpy as np
	from pathlib import Path

	# Load SpaCy model
	nlp = spacy.load("en_core_web_sm")

	# Categories and their colors
	CATEGORIES = {
	"Main Themes": "#004d99",
	"Events": "#006400",
	"People": "#8b4513",
	"Laws/Policies": "#4b0082",
	"Concepts": "#800000"
	}

	def load_historical_data():
	"""Load and process the Unit 5 text data."""
	try:
	with open("Unit5_OCR.txt", "r", encoding="utf-8") as f:
	content = f.read()
	return content
	except FileNotFoundError:
	return "Historical data file not found."

	def extract_entities(text):
	"""Extract named entities and important terms from text."""
	doc = nlp(text)
	entities = {}

	# Extract named entities
	for ent in doc.ents:
	if ent.label_ in ["PERSON", "EVENT", "DATE", "LAW", "ORG"]:
	if ent.text not in entities:
	entities[ent.text] = {
	"type": ent.label_,
	"count": 1,
	"context": []
	}
	else:
	entities[ent.text]["count"] += 1

	return entities

	def find_related_terms(term, text, window_size=100):
	"""Find terms that appear near the search term."""
	term = term.lower()
	text = text.lower()
	related = {}

	# Find all occurrences of the term
	index = text.find(term)
	while index != -1:
	# Get surrounding context
	start = max(0, index - window_size)
	end = min(len(text), index + len(term) + window_size)
	context = text[start:end]

	# Process context to find other entities
	doc = nlp(context)
	for ent in doc.ents:
	if ent.text.lower() != term:
	if ent.text not in related:
	related[ent.text] = {
	"type": ent.label_,
	"count": 1,
	"relevance": 1.0
	}
	else:
	related[ent.text]["count"] += 1
	related[ent.text]["relevance"] += 0.5

	index = text.find(term, index + 1)

	return related

	def generate_context_map(term):
	"""Generate a network visualization for the given term."""
	if not term.strip():
	return None

	# Load historical data
	content = load_historical_data()
	if content == "Historical data file not found.":
	return None

	# Create network graph
	G = nx.Graph()

	# Find related terms
	related_items = find_related_terms(term, content)

	# Add central node
	G.add_node(term, category="Main Themes")

	# Add related nodes (limit to top 10 by relevance)
	sorted_items = sorted(related_items.items(),
	key=lambda x: x[1]["relevance"],
	reverse=True)[:10]

	for item_name, item_data in sorted_items:
	G.add_node(item_name, category=item_data["type"])
	G.add_edge(term, item_name,
	weight=item_data["relevance"],
	length=2.0/item_data["relevance"])

	# Create visualization
	plt.figure(figsize=(12, 12))
	plt.clf()

	# Set up the layout
	pos = nx.spring_layout(G, k=1, iterations=50)

	# Draw nodes
	for category, color in CATEGORIES.items():
	nodes = [node for node, attr in G.nodes(data=True)
	if attr.get("category", "") == category]
	nx.draw_networkx_nodes(G, pos, nodelist=nodes,
	node_color=color,
	node_size=2000)

	# Draw edges
	nx.draw_networkx_edges(G, pos, edge_color='white',
	width=1, alpha=0.5)

	# Add labels
	labels = {node: node for node in G.nodes()}
	nx.draw_networkx_labels(G, pos, labels, font_size=8,
	font_color='white')

	# Set dark background
	plt.gca().set_facecolor('#1a1a1a')
	plt.gcf().set_facecolor('#1a1a1a')

	# Add title
	plt.title(f"Historical Context Map for '{term}'",
	color='white', pad=20)

	return plt.gcf()

	# Create Gradio interface
	iface = gr.Interface(
	fn=generate_context_map,
	inputs=gr.Textbox(label="Enter a historical term from Unit 5",
	placeholder="e.g., Civil War, Abraham Lincoln, Reconstruction"),
	outputs=gr.Plot(),
	title="Historical Context Mapper",
	description="This tool generates a network visualization showing the historical context and connections for terms from Unit 5 (1844-1877).",
	theme="darkhuggingface",
	examples=[
	["Civil War"],
	["Abraham Lincoln"],
	["Reconstruction"],
	["Manifest Destiny"],
	["Transcontinental Railroad"]
	]
	)

	if __name__ == "__main__":
	iface.launch()