Spaces:
Sleeping
Sleeping
File size: 4,944 Bytes
7361754 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import gradio as gr
import networkx as nx
import matplotlib.pyplot as plt
import spacy
import pandas as pd
import numpy as np
from pathlib import Path
# Load SpaCy model
nlp = spacy.load("en_core_web_sm")
# Categories and their colors
CATEGORIES = {
"Main Themes": "#004d99",
"Events": "#006400",
"People": "#8b4513",
"Laws/Policies": "#4b0082",
"Concepts": "#800000"
}
def load_historical_data():
"""Load and process the Unit 5 text data."""
try:
with open("Unit5_OCR.txt", "r", encoding="utf-8") as f:
content = f.read()
return content
except FileNotFoundError:
return "Historical data file not found."
def extract_entities(text):
"""Extract named entities and important terms from text."""
doc = nlp(text)
entities = {}
# Extract named entities
for ent in doc.ents:
if ent.label_ in ["PERSON", "EVENT", "DATE", "LAW", "ORG"]:
if ent.text not in entities:
entities[ent.text] = {
"type": ent.label_,
"count": 1,
"context": []
}
else:
entities[ent.text]["count"] += 1
return entities
def find_related_terms(term, text, window_size=100):
"""Find terms that appear near the search term."""
term = term.lower()
text = text.lower()
related = {}
# Find all occurrences of the term
index = text.find(term)
while index != -1:
# Get surrounding context
start = max(0, index - window_size)
end = min(len(text), index + len(term) + window_size)
context = text[start:end]
# Process context to find other entities
doc = nlp(context)
for ent in doc.ents:
if ent.text.lower() != term:
if ent.text not in related:
related[ent.text] = {
"type": ent.label_,
"count": 1,
"relevance": 1.0
}
else:
related[ent.text]["count"] += 1
related[ent.text]["relevance"] += 0.5
index = text.find(term, index + 1)
return related
def generate_context_map(term):
"""Generate a network visualization for the given term."""
if not term.strip():
return None
# Load historical data
content = load_historical_data()
if content == "Historical data file not found.":
return None
# Create network graph
G = nx.Graph()
# Find related terms
related_items = find_related_terms(term, content)
# Add central node
G.add_node(term, category="Main Themes")
# Add related nodes (limit to top 10 by relevance)
sorted_items = sorted(related_items.items(),
key=lambda x: x[1]["relevance"],
reverse=True)[:10]
for item_name, item_data in sorted_items:
G.add_node(item_name, category=item_data["type"])
G.add_edge(term, item_name,
weight=item_data["relevance"],
length=2.0/item_data["relevance"])
# Create visualization
plt.figure(figsize=(12, 12))
plt.clf()
# Set up the layout
pos = nx.spring_layout(G, k=1, iterations=50)
# Draw nodes
for category, color in CATEGORIES.items():
nodes = [node for node, attr in G.nodes(data=True)
if attr.get("category", "") == category]
nx.draw_networkx_nodes(G, pos, nodelist=nodes,
node_color=color,
node_size=2000)
# Draw edges
nx.draw_networkx_edges(G, pos, edge_color='white',
width=1, alpha=0.5)
# Add labels
labels = {node: node for node in G.nodes()}
nx.draw_networkx_labels(G, pos, labels, font_size=8,
font_color='white')
# Set dark background
plt.gca().set_facecolor('#1a1a1a')
plt.gcf().set_facecolor('#1a1a1a')
# Add title
plt.title(f"Historical Context Map for '{term}'",
color='white', pad=20)
return plt.gcf()
# Create Gradio interface
iface = gr.Interface(
fn=generate_context_map,
inputs=gr.Textbox(label="Enter a historical term from Unit 5",
placeholder="e.g., Civil War, Abraham Lincoln, Reconstruction"),
outputs=gr.Plot(),
title="Historical Context Mapper",
description="This tool generates a network visualization showing the historical context and connections for terms from Unit 5 (1844-1877).",
theme="darkhuggingface",
examples=[
["Civil War"],
["Abraham Lincoln"],
["Reconstruction"],
["Manifest Destiny"],
["Transcontinental Railroad"]
]
)
if __name__ == "__main__":
iface.launch() |