Spaces:

AsiminaM
/

xNeSy

Sleeping

App Files Files Community

xNeSy / app.py

AsiminaM

revert to initial hf code

107d2c6 verified 19 days ago

raw

history blame contribute delete

107 kB


	# app.py - AsiminaM
	import gradio as gr
	import rdflib
	import re
	import os
	import tempfile
	from huggingface_hub import InferenceClient
	import PyPDF2
	from docx import Document
	import pandas as pd
	import networkx as nx
	import matplotlib
	matplotlib.use('Agg') # Use non-interactive backend
	import matplotlib.pyplot as plt
	from matplotlib.backends.backend_agg import FigureCanvasAgg
	import plotly.graph_objects as go
	import plotly.express as px
	from file_processing import handle_file_upload as fp_handle_file_upload
	from knowledge import (
	show_graph_contents as kb_show_graph_contents,
	visualize_knowledge_graph as kb_visualize_knowledge_graph,
	import_knowledge_from_json_file as kb_import_json,
	save_knowledge_graph as kb_save_knowledge_graph,
	load_knowledge_graph as kb_load_knowledge_graph,
	graph as kb_graph,
	delete_all_knowledge as kb_delete_all_knowledge,
	add_to_graph as kb_add_to_graph
	)
	from knowledge import create_comprehensive_backup as kb_create_comprehensive_backup, BACKUP_FILE
	from responses import respond as rqa_respond

	# ==========================================================
	# 🧠 1. Global Knowledge Graph with Persistent Storage
	# ==========================================================
	import json
	import pickle
	from datetime import datetime

	# Storage file paths
	KNOWLEDGE_FILE = "knowledge_graph.pkl"
	BACKUP_FILE = "knowledge_backup.json"

	graph = rdflib.Graph()

	# Mapping of fact IDs to triples for editing operations
	fact_index = {}

	def import_knowledge_from_json_file(file):
	"""Import knowledge facts from a JSON file (backup format or simple list).
	Supported formats:
	- { "metadata": {...}, "facts": [{subject,predicate,object,...}, ...] }
	- { "facts": [{subject,predicate,object}, ...] }
	- [ {subject,predicate,object}, ... ]
	Returns a status message about counts imported.
	"""
	try:
	if file is None:
	return "⚠️ No file selected."

	file_path = file.name if hasattr(file, 'name') else str(file)
	if not os.path.exists(file_path):
	return f"⚠️ File not found: {file_path}"

	with open(file_path, 'r', encoding='utf-8') as f:
	data = json.load(f)

	# Normalize to list of fact dicts
	if isinstance(data, dict) and 'facts' in data:
	facts = data['facts']
	elif isinstance(data, list):
	facts = data
	else:
	return "❌ Unsupported JSON structure. Expect an object with 'facts' or a list of facts."

	added = 0
	skipped = 0
	for fact in facts:
	try:
	subject = fact.get('subject') or fact.get('full_subject')
	predicate = fact.get('predicate') or fact.get('full_predicate')
	obj = fact.get('object') or fact.get('full_object')
	if not subject or not predicate or obj is None:
	skipped += 1
	continue
	# Use short forms; ensure URNs
	s_ref = rdflib.URIRef(subject if str(subject).startswith('urn:') else f"urn:{subject}")
	p_ref = rdflib.URIRef(predicate if str(predicate).startswith('urn:') else f"urn:{predicate}")
	o_lit = rdflib.Literal(obj)
	graph.add((s_ref, p_ref, o_lit))
	added += 1
	except Exception:
	skipped += 1

	save_knowledge_graph()
	return f"✅ Imported {added} facts. Skipped {skipped}. Total facts: {len(graph)}."
	except Exception as e:
	return f"❌ Import failed: {e}"

	def handle_import_json(file):
	"""Gradio handler: import JSON knowledge and report status"""
	status = import_knowledge_from_json_file(file)
	return status

	def save_knowledge_graph():
	"""Save the knowledge graph to persistent storage"""
	try:
	# Save as pickle for RDF graph
	with open(KNOWLEDGE_FILE, 'wb') as f:
	pickle.dump(graph, f)

	# Also save a human-readable backup
	backup_data = {
	"timestamp": datetime.now().isoformat(),
	"total_facts": len(graph),
	"facts": []
	}

	for i, (s, p, o) in enumerate(graph):
	backup_data["facts"].append({
	"id": i+1,
	"subject": str(s),
	"predicate": str(p),
	"object": str(o)
	})

	with open(BACKUP_FILE, 'w', encoding='utf-8') as f:
	json.dump(backup_data, f, indent=2, ensure_ascii=False)

	print(f"Saved {len(graph)} facts to persistent storage")
	return f" Saved {len(graph)} facts to storage"

	except Exception as e:
	error_msg = f" Error saving knowledge: {e}"
	print(error_msg)
	return error_msg

	def load_knowledge_graph():
	"""Load the knowledge graph from persistent storage"""
	global graph

	try:
	if os.path.exists(KNOWLEDGE_FILE):
	with open(KNOWLEDGE_FILE, 'rb') as f:
	graph = pickle.load(f)
	print(f"📂 Loaded {len(graph)} facts from storage")
	return f"📂 Loaded {len(graph)} facts from storage"
	else:
	print("📂 No existing knowledge file found, starting fresh")
	return "📂 No existing knowledge file found, starting fresh"

	except Exception as e:
	error_msg = f" Error loading knowledge: {e}"
	print(error_msg)
	return error_msg

	def create_and_get_backup():
	"""Create a comprehensive backup and return the file path"""
	try:
	print(f"Creating backup for graph with {len(graph)} facts")

	# Create comprehensive backup
	create_comprehensive_backup()

	# Verify the backup was created and contains data
	if os.path.exists(BACKUP_FILE):
	with open(BACKUP_FILE, 'r', encoding='utf-8') as f:
	backup_content = json.load(f)
	fact_count = backup_content.get('metadata', {}).get('total_facts', 0)
	print(f" Knowledge backup created with {fact_count} facts")

	if fact_count == 0:
	print("⚠️ Warning: Backup file created but contains no facts")
	# Create a backup even if empty to show the structure
	create_empty_backup_structure()

	# Return both the file path and status message
	return BACKUP_FILE, f" Backup created successfully with {fact_count} facts!"
	else:
	print(" Backup file was not created")
	return None, " Failed to create backup file"

	except Exception as e:
	print(f" Error creating backup: {e}")
	# Create a minimal backup file even if there's an error
	create_error_backup(str(e))
	return BACKUP_FILE, f"⚠️ Backup created with errors: {e}"

	def verify_backup_contents():
	"""Verify and display backup file contents"""
	try:
	if not os.path.exists(BACKUP_FILE):
	return " No backup file found. Click 'Create Knowledge Backup' first."

	with open(BACKUP_FILE, 'r', encoding='utf-8') as f:
	backup_data = json.load(f)

	metadata = backup_data.get('metadata', {})
	facts = backup_data.get('facts', [])

	result = f"📊 Backup File Verification:\n\n"
	result += f"File: `{BACKUP_FILE}`\n"
	result += f"Size: {os.path.getsize(BACKUP_FILE):,} bytes\n"
	result += f"Created: {metadata.get('timestamp', 'Unknown')}\n"
	result += f"Total Facts: {metadata.get('total_facts', 0)}\n"
	result += f"Backup Type: {metadata.get('backup_type', 'Unknown')}\n\n"

	if facts:
	result += f"Sample Facts (first 5):\n"
	for i, fact in enumerate(facts[:5]):
	result += f"{i+1}. {fact.get('subject')} {fact.get('predicate')} {fact.get('object')}\n"

	if len(facts) > 5:
	result += f"\n... and {len(facts) - 5} more facts\n"
	else:
	result += "⚠️ No facts found in backup file!\n"

	return result

	except Exception as e:
	return f" Error verifying backup: {e}"

	def get_knowledge_file():
	"""Return the knowledge backup file for download (legacy function)"""
	file_path, status = create_and_get_backup()
	return file_path

	def create_comprehensive_backup():
	"""Create a comprehensive backup file with all knowledge facts"""
	global graph

	try:
	print(f"Creating backup for graph with {len(graph)} facts")

	# Create detailed backup data
	backup_data = {
	"metadata": {
	"timestamp": datetime.now().isoformat(),
	"total_facts": len(graph),
	"backup_type": "comprehensive_knowledge_base",
	"graph_size": len(graph)
	},
	"facts": []
	}

	# Add all facts from the graph
	fact_count = 0
	for i, (s, p, o) in enumerate(graph):
	# Clean up the subject, predicate, object for better readability
	subject = str(s).split(':')[-1] if ':' in str(s) else str(s)
	predicate = str(p).split(':')[-1] if ':' in str(p) else str(p)
	object_val = str(o)

	backup_data["facts"].append({
	"id": i + 1,
	"subject": subject,
	"predicate": predicate,
	"object": object_val,
	"full_subject": str(s),
	"full_predicate": str(p),
	"full_object": str(o)
	})
	fact_count += 1

	# Update the fact count in metadata
	backup_data["metadata"]["total_facts"] = fact_count

	# Save as JSON
	with open(BACKUP_FILE, 'w', encoding='utf-8') as f:
	json.dump(backup_data, f, indent=2, ensure_ascii=False)

	# Also create a human-readable text version
	create_readable_backup()

	print(f"Created comprehensive backup with {fact_count} facts")

	except Exception as e:
	print(f" Error creating comprehensive backup: {e}")
	# Create a minimal backup even if there's an error
	create_error_backup(str(e))

	def create_empty_backup_structure():
	"""Create a backup file structure even when no facts exist"""
	try:
	backup_data = {
	"metadata": {
	"timestamp": datetime.now().isoformat(),
	"total_facts": 0,
	"backup_type": "empty_knowledge_base",
	"message": "No facts found in knowledge graph"
	},
	"facts": [],
	"instructions": {
	"how_to_add_knowledge": [
	"1. Add text directly using the 'Add Knowledge from Text' box",
	"2. Upload documents (PDF, DOCX, TXT, CSV) using the file upload",
	"3. Process files to extract knowledge automatically",
	"4. Use 'Save Knowledge' to persist your data"
	]
	}
	}

	with open(BACKUP_FILE, 'w', encoding='utf-8') as f:
	json.dump(backup_data, f, indent=2, ensure_ascii=False)

	print(" Created empty backup structure")

	except Exception as e:
	print(f" Error creating empty backup: {e}")

	def create_error_backup(error_message):
	"""Create a backup file when there's an error"""
	try:
	backup_data = {
	"metadata": {
	"timestamp": datetime.now().isoformat(),
	"total_facts": 0,
	"backup_type": "error_backup",
	"error": error_message
	},
	"facts": [],
	"note": "An error occurred while creating the backup. Please try again."
	}

	with open(BACKUP_FILE, 'w', encoding='utf-8') as f:
	json.dump(backup_data, f, indent=2, ensure_ascii=False)

	print(f" Created error backup: {error_message}")

	except Exception as e:
	print(f" Error creating error backup: {e}")

	def create_readable_backup():
	"""Create a human-readable text backup"""
	global graph

	try:
	print(f"Creating readable backup for {len(graph)} facts")

	# Create readable text file
	readable_text = f"# Knowledge Base Backup\n"
	readable_text += f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
	readable_text += f"Total Facts: {len(graph)}\n\n"

	if len(graph) == 0:
	readable_text += "No facts in knowledge base.\n\n"
	readable_text += "## How to Add Knowledge:\n"
	readable_text += "1. Add text directly using the 'Add Knowledge from Text' box\n"
	readable_text += "2. Upload documents (PDF, DOCX, TXT, CSV) using the file upload\n"
	readable_text += "3. Process files to extract knowledge automatically\n"
	readable_text += "4. Use 'Save Knowledge' to persist your data\n"
	else:
	# Group facts by subject for better organization
	facts_by_subject = {}
	fact_count = 0

	for s, p, o in graph:
	subject = str(s).split(':')[-1] if ':' in str(s) else str(s)
	predicate = str(p).split(':')[-1] if ':' in str(p) else str(p)
	object_val = str(o)

	if subject not in facts_by_subject:
	facts_by_subject[subject] = []
	facts_by_subject[subject].append(f"{predicate}: {object_val}")
	fact_count += 1

	# Add organized facts
	for subject, facts in facts_by_subject.items():
	readable_text += f"## {subject}\n"
	for fact in facts:
	readable_text += f"- {fact}\n"
	readable_text += "\n"

	readable_text += f"\n## Summary\n"
	readable_text += f"Total facts processed: {fact_count}\n"
	readable_text += f"Unique subjects: {len(facts_by_subject)}\n"

	# Save readable version
	with open("knowledge_readable.txt", 'w', encoding='utf-8') as f:
	f.write(readable_text)

	print(f" Created readable backup: knowledge_readable.txt with {len(graph)} facts")

	except Exception as e:
	print(f" Error creating readable backup: {e}")
	# Create a minimal readable backup even if there's an error
	try:
	error_text = f"# Knowledge Base Backup (Error)\n"
	error_text += f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
	error_text += f"Error: {e}\n"
	error_text += f"Total Facts: {len(graph)}\n"

	with open("knowledge_readable.txt", 'w', encoding='utf-8') as f:
	f.write(error_text)
	print(" Created error-readable backup")
	except:
	print(" Failed to create even error-readable backup")

	def debug_backup_process():
	"""Debug function to help troubleshoot backup issues"""
	global graph

	debug_info = f" Backup Debug Information:\n\n"

	# Check graph state
	debug_info += f"Graph State:\n"
	debug_info += f"• Graph length: {len(graph)}\n"
	debug_info += f"• Graph type: {type(graph)}\n"
	debug_info += f"• Graph empty: {len(graph) == 0}\n\n"

	# Check files
	debug_info += f"File Status:\n"
	debug_info += f"• Knowledge file exists: {os.path.exists(KNOWLEDGE_FILE)}\n"
	debug_info += f"• Backup file exists: {os.path.exists(BACKUP_FILE)}\n"
	debug_info += f"• Readable file exists: {os.path.exists('knowledge_readable.txt')}\n\n"

	# Show sample facts if any exist
	if len(graph) > 0:
	debug_info += f"Sample Facts (first 5):\n"
	fact_count = 0
	for s, p, o in graph:
	if fact_count >= 5:
	break
	debug_info += f"• {s} {p} {o}\n"
	fact_count += 1
	debug_info += "\n"
	else:
	debug_info += f"No facts in graph\n\n"

	# Test backup creation
	debug_info += f"Testing Backup Creation:\n"
	try:
	create_comprehensive_backup()
	debug_info += f"• Backup creation: Success\n"

	if os.path.exists(BACKUP_FILE):
	with open(BACKUP_FILE, 'r', encoding='utf-8') as f:
	backup_data = json.load(f)
	fact_count = backup_data.get('metadata', {}).get('total_facts', 0)
	debug_info += f"• Facts in backup: {fact_count}\n"
	debug_info += f"• Backup metadata: {backup_data.get('metadata', {})}\n"
	else:
	debug_info += f"• Backup file: Not created\n"

	except Exception as e:
	debug_info += f"• Backup creation: Error: {e}\n"

	return debug_info

	def show_storage_info():
	"""Show information about where files are stored"""
	info = f"📁 Storage Information:\n\n"

	# Check if files exist
	pkl_exists = os.path.exists(KNOWLEDGE_FILE)
	json_exists = os.path.exists(BACKUP_FILE)

	info += f"Primary Storage: `{KNOWLEDGE_FILE}` {' Exists' if pkl_exists else ' Not found'}\n"
	info += f"Backup Storage: `{BACKUP_FILE}` {' Exists' if json_exists else ' Not found'}\n"
	info += f"Readable Backup: `knowledge_readable.txt` {' Exists' if os.path.exists('knowledge_readable.txt') else ' Not found'}\n\n"

	if pkl_exists:
	file_size = os.path.getsize(KNOWLEDGE_FILE)
	info += f"File Size: {file_size:,} bytes\n"

	info += f"Total Facts: {len(graph)}\n\n"

	info += "How to Access:\n"
	info += "• On Hugging Face Spaces: Files are in `/home/user/app/`\n"
	info += "• On Local Machine: Files are in your project folder\n"
	info += "• Use ' Download Knowledge' button to get the JSON backup\n"

	return info


	def extract_triples(text):
	"""
	Enhanced extraction for better knowledge extraction from documents.
	Uses improved pattern matching and entity recognition.
	"""
	triples = []

	print(f"Extracting knowledge from {len(text)} characters...")

	# Extract entities (people, organizations, locations, dates)
	entities = extract_entities(text)
	for entity in entities:
	triples.append((entity, 'type', 'entity'))

	# Extract structured data (key-value pairs)
	triples.extend(extract_structured_triples(text))

	# Extract regular sentences with improved parsing
	triples.extend(extract_regular_triples_improved(text, entities))

	# Also try original extraction as backup for coverage
	triples.extend(extract_regular_triples(text))

	# Remove duplicates and validate
	unique_triples = []
	for s, p, o in triples:
	if s and p and o and len(s) > 2 and len(p) > 1 and len(o) > 2:
	# Clean and validate
	s = s.strip()[:100] # Limit length
	p = p.strip()[:50]
	o = o.strip()[:200]
	if (s, p, o) not in unique_triples:
	unique_triples.append((s, p, o))

	print(f"Total extracted {len(unique_triples)} unique triples")
	for i, (s, p, o) in enumerate(unique_triples[:10]):
	print(f" {i+1}. {s} {p} {o}")

	return unique_triples

	def extract_entities(text):
	"""Extract named entities (people, organizations, locations, etc.)"""
	entities = []

	# Capitalized word patterns (likely proper nouns)
	capitalized_words = re.findall(r'\b[A-Z][a-z]+\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)?\b', text)
	entities.extend(capitalized_words)

	# Extract organizations (typical suffixes)
	org_patterns = [
	r'([A-Z][a-zA-Z\s]+)\s+(Inc\|Ltd\|LLC\|Corp\|Corporation\|Company\|Co\.\|Ltd\.)',
	r'([A-Z][a-zA-Z\s]+)\s+(University\|Institute\|Lab\|Laboratory)',
	]
	for pattern in org_patterns:
	matches = re.findall(pattern, text)
	entities.extend([m[0].strip() for m in matches])

	# Extract locations (cities, countries)
	location_keywords = ['in ', 'at ', 'near ', 'from ']
	for keyword in location_keywords:
	pattern = f'{keyword}([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)'
	matches = re.findall(pattern, text)
	entities.extend(matches)

	# Extract dates
	dates = re.findall(r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b\|\b\d{4}\b', text)
	entities.extend(dates)

	# Remove duplicates and clean
	entities = list(set([e.strip() for e in entities if len(e.strip()) > 3]))
	return entities[:50] # Limit to top 50

	def extract_regular_triples_improved(text, entities):
	"""Improved extraction with better sentence parsing and entity linking"""
	triples = []

	# Split into sentences
	sentences = re.split(r'[.!?\n]+', text)

	for sentence in sentences:
	sentence = sentence.strip()
	if len(sentence) < 15: # Skip very short sentences
	continue

	# Try improved patterns
	improved_patterns = [
	# Subject-Verb-Object patterns
	(r'([A-Z][a-zA-Z\s]+(?:,\s+[A-Z][a-zA-Z\s]+)*)\s+(is\|are\|was\|were\|becomes\|represents\|means\|refers to\|denotes)\s+(.+)', 'relates to'),
	(r'([A-Z][a-zA-Z\s]+)\s+(uses\|employs\|utilizes\|applies)\s+(.+)', 'uses'),
	(r'([A-Z][a-zA-Z\s]+)\s+(develops\|created\|designed\|implemented)\s+(.+)', 'creates'),
	(r'([A-Z][a-zA-Z\s]+)\s+(requires\|needs\|demands)\s+(.+)', 'requires'),
	(r'([A-Z][a-zA-Z\s]+)\s+(enables\|allows\|permits)\s+(.+)', 'enables'),
	(r'([A-Z][a-zA-Z\s]+)\s+(affects\|impacts\|influences\|affects)\s+(.+)', 'affects'),

	# Research/technical patterns
	(r'([A-Z][a-zA-Z\s]+)\s+(found\|discovered\|identified\|observed\|detected)\s+(.+)', 'discovered'),
	(r'([A-Z][a-zA-Z\s]+)\s+(studies\|analyzes\|examines\|investigates)\s+(.+)', 'studies'),
	(r'([A-Z][a-zA-Z\s]+)\s+(proposes\|suggests\|recommends)\s+(.+)', 'proposes'),
	(r'([A-Z][a-zA-Z\s]+)\s+(results in\|leads to\|causes)\s+(.+)', 'causes'),

	# Relationships
	(r'([A-Z][a-zA-Z\s]+)\s+(works with\|collaborates with\|partnered with)\s+(.+)', 'works with'),
	(r'([A-Z][a-zA-Z\s]+)\s+(located in\|based in\|situated in)\s+(.+)', 'located in'),
	]

	for pattern, predicate in improved_patterns:
	match = re.search(pattern, sentence, re.IGNORECASE)
	if match:
	groups = match.groups()
	subject = groups[0].strip() if len(groups) > 0 else ''
	object_val = groups[-1].strip() if len(groups) > 1 else ''

	# Clean up
	subject = re.sub(r'^(the\|a\|an)\s+', '', subject, flags=re.IGNORECASE).strip()
	object_val = re.sub(r'^(the\|a\|an)\s+', '', object_val, flags=re.IGNORECASE).strip()

	if subject and object_val and len(subject) > 3 and len(object_val) > 3:
	triples.append((subject, predicate, object_val))
	break

	# Also extract simple clauses with 'that', 'which', 'who'
	clause_patterns = [
	r'([A-Z][a-zA-Z\s]+)\s+which\s+(.+)',
	r'([A-Z][a-zA-Z\s]+)\s+that\s+(.+)',
	r'([A-Z][a-zA-Z\s]+)\s+who\s+(.+)',
	]
	for pattern in clause_patterns:
	match = re.search(pattern, sentence)
	if match:
	subject = match.group(1).strip()
	description = match.group(2).strip()
	if subject and description and len(subject) > 3 and len(description) > 3:
	triples.append((subject, 'has property', description[:150]))

	return triples

	def extract_structured_triples(text):
	"""Extract triples from structured data (key-value pairs, tables, etc.)"""
	triples = []
	lines = text.split('\n')

	# General patterns for structured data extraction
	patterns = [
	# Date patterns
	(r'date\s:?\s([0-9\/\-\.]+)', 'date', 'is'),
	(r'time\s:?\s([0-9:]+)', 'time', 'is'),
	(r'created\s:?\s([0-9\/\-\.]+)', 'created_date', 'is'),
	(r'modified\s:?\s([0-9\/\-\.]+)', 'modified_date', 'is'),

	# ID and reference patterns
	(r'id\s:?\s([A-Z0-9\-]+)', 'id', 'is'),
	(r'number\s:?\s([A-Z0-9\-]+)', 'number', 'is'),
	(r'code\s:?\s([A-Z0-9\-]+)', 'code', 'is'),
	(r'reference\s:?\s([A-Z0-9\-]+)', 'reference', 'is'),

	# Name and title patterns
	(r'name\s:?\s([A-Za-z\s&.,]+)', 'name', 'is'),
	(r'title\s:?\s([A-Za-z\s&.,]+)', 'title', 'is'),
	(r'company\s:?\s([A-Za-z\s&.,]+)', 'company', 'is'),
	(r'organization\s:?\s([A-Za-z\s&.,]+)', 'organization', 'is'),

	# Contact patterns
	(r'email\s:?\s([A-Za-z0-9@\.\-]+)', 'email', 'is'),
	(r'phone\s:?\s([0-9\s\-\+]+)', 'phone', 'is'),
	(r'address\s:?\s([A-Za-z0-9\s\-\.,]+)', 'address', 'is'),

	# Description patterns
	(r'description\s:?\s([A-Za-z0-9\s\-\.,]+)', 'description', 'is'),
	(r'type\s:?\s([A-Za-z0-9\s\-\.,]+)', 'type', 'is'),
	(r'category\s:?\s([A-Za-z0-9\s\-\.,]+)', 'category', 'is'),
	(r'status\s:?\s([A-Za-z0-9\s\-\.,]+)', 'status', 'is'),

	# Location patterns
	(r'location\s:?\s([A-Za-z0-9\s\-\.,]+)', 'location', 'is'),
	(r'department\s:?\s([A-Za-z0-9\s\-\.,]+)', 'department', 'is'),
	(r'section\s:?\s([A-Za-z0-9\s\-\.,]+)', 'section', 'is'),

	# Amount patterns
	(r'amount\s:?\s\$?([0-9,]+\.?[0-9]*)', 'amount', 'is'),
	(r'total\s:?\s\$?([0-9,]+\.?[0-9]*)', 'total', 'is'),
	(r'price\s:?\s\$?([0-9,]+\.?[0-9]*)', 'price', 'is'),
	(r'cost\s:?\s\$?([0-9,]+\.?[0-9]*)', 'cost', 'is'),
	]

	for line in lines:
	line = line.strip()
	if len(line) < 5:
	continue

	for pattern, subject, predicate in patterns:
	match = re.search(pattern, line, re.IGNORECASE)
	if match:
	value = match.group(1).strip()
	if value and len(value) > 1:
	triples.append((subject, predicate, value))
	break # Only one match per line

	# General key-value pair extraction
	kv_patterns = [
	# Standard colon format
	r'([A-Za-z\s]+):\s*([A-Za-z0-9\s\$\-\.\/,]+)',
	# Equals format
	r'([A-Za-z\s]+)\s=\s([A-Za-z0-9\s\$\-\.\/,]+)',
	# Dash format
	r'([A-Za-z\s]+)\s-\s([A-Za-z0-9\s\$\-\.\/,]+)',
	]

	for line in lines:
	for pattern in kv_patterns:
	match = re.search(pattern, line)
	if match:
	key = match.group(1).strip().lower().replace(' ', '_')
	value = match.group(2).strip()
	if len(key) > 2 and len(value) > 1:
	triples.append((key, 'is', value))

	# Extract any line that looks like "Label: Value" or "Label Value"
	for line in lines:
	line = line.strip()
	if ':' in line and len(line) > 10:
	parts = line.split(':', 1)
	if len(parts) == 2:
	key = parts[0].strip()
	value = parts[1].strip()
	if len(key) > 2 and len(value) > 1 and not key.isdigit():
	# Clean the key
	clean_key = re.sub(r'[^A-Za-z0-9\s]', '', key).strip().lower().replace(' ', '_')
	if clean_key:
	triples.append((clean_key, 'is', value))

	print(f"Structured extraction found {len(triples)} triples")
	return triples

	def extract_regular_triples(text):
	"""Extract triples using regular sentence patterns"""
	triples = []

	# Clean and split text into sentences
	sentences = re.split(r"[.?!\n]", text)
	print(f" Found {len(sentences)} sentences for regular extraction")

	# English extraction patterns
	patterns = [
	# Basic patterns
	r"\s+(is\|are\|was\|were)\s+",
	r"\s+(has\|have\|had)\s+",
	r"\s+(uses\|used\|using)\s+",
	r"\s+(creates\|created\|creating)\s+",
	r"\s+(develops\|developed\|developing)\s+",
	r"\s+(leads\|led\|leading)\s+",
	r"\s+(affects\|affected\|affecting)\s+",
	r"\s+(contains\|contained\|containing)\s+",
	r"\s+(includes\|included\|including)\s+",
	r"\s+(involves\|involved\|involving)\s+",
	r"\s+(requires\|required\|requiring)\s+",
	r"\s+(produces\|produced\|producing)\s+",
	r"\s+(causes\|caused\|causing)\s+",
	r"\s+(results\|resulted\|resulting)\s+",
	r"\s+(enables\|enabled\|enabling)\s+",
	r"\s+(provides\|provided\|providing)\s+",
	r"\s+(supports\|supported\|supporting)\s+",
	r"\s+(allows\|allowed\|allowing)\s+",
	r"\s+(helps\|helped\|helping)\s+",
	r"\s+(improves\|improved\|improving)\s+",
	r"\s+(located\|situated\|found)\s+",
	r"\s+(consists\|composed\|made)\s+",
	r"\s+(operates\|functions\|works)\s+",
	r"\s+(generates\|creates\|produces)\s+",
	r"\s+(transforms\|converts\|changes)\s+",
	r"\s+(connects\|links\|relates)\s+",
	r"\s+(influences\|impacts\|affects)\s+",
	r"\s+(depends\|relies\|based)\s+",
	r"\s+(represents\|symbolizes\|stands)\s+",
	r"\s+(describes\|explains\|defines)\s+",
	r"\s+(refers\|referring\|referenced)\s+",
	r"\s+(concerns\|concerning\|concerned)\s+",
	r"\s+(relates\|relating\|related)\s+",
	r"\s+(analyzes\|analyzing\|analyzed)\s+",
	r"\s+(examines\|examining\|examined)\s+",
	r"\s+(studies\|studying\|studied)\s+",
	r"\s+(checks\|checking\|checked)\s+",
	r"\s+(manages\|managing\|managed)\s+",
	r"\s+(organizes\|organizing\|organized)\s+",
	r"\s+(coordinates\|coordinating\|coordinated)\s+",
	]

	for sentence in sentences:
	sentence = sentence.strip()
	if len(sentence) < 10: # Skip very short sentences
	continue

	# Try each pattern
	for pattern in patterns:
	parts = re.split(pattern, sentence, maxsplit=1)
	if len(parts) == 3:
	subj, pred, obj = parts
	subj = subj.strip()
	pred = pred.strip()
	obj = obj.strip()

	# Clean up the parts
	if subj and pred and obj and len(subj) > 2 and len(obj) > 2:
	# Remove common prefixes/suffixes
	subj = re.sub(r'^(the\|a\|an)\s+', '', subj, flags=re.IGNORECASE)
	obj = re.sub(r'^(the\|a\|an)\s+', '', obj, flags=re.IGNORECASE)

	triples.append((subj, pred, obj))
	break # Found a match, move to next sentence

	print(f"Regular extraction found {len(triples)} triples")
	return triples


	def add_to_graph(text):
	"""
	Parse text into triples and add them to the RDF graph.
	"""
	new_triples = extract_triples(text)
	for s, p, o in new_triples:
	graph.add((rdflib.URIRef(f"urn:{s}"), rdflib.URIRef(f"urn:{p}"), rdflib.Literal(o)))

	# Automatically save after adding knowledge
	save_result = save_knowledge_graph()

	return f" Added {len(new_triples)} new triples. Total facts stored: {len(graph)}.\n{save_result}"


	def retrieve_context(question, limit=10):
	"""
	Retrieve RDF facts related to keywords in the question with better matching.
	"""
	matches = []
	qwords = question.lower().split()

	# Remove common words that don't add meaning
	stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'what', 'how', 'when', 'where', 'why', 'who'}
	qwords = [w for w in qwords if w not in stop_words and len(w) > 2]

	print(f"Searching for: {qwords}")

	# Score matches by relevance
	scored_matches = []

	for s, p, o in graph:
	subject = str(s).split(':')[-1] if ':' in str(s) else str(s)
	predicate = str(p).split(':')[-1] if ':' in str(p) else str(p)
	object_val = str(o)

	fact_text = f"{subject} {predicate} {object_val}".lower()

	# Calculate relevance score
	score = 0
	for word in qwords:
	if word in fact_text:
	score += 1
	# Bonus for exact matches
	if word == subject.lower() or word == predicate.lower():
	score += 2

	if score > 0:
	scored_matches.append((score, f"{subject} {predicate} {object_val}"))

	# Sort by relevance score (highest first)
	scored_matches.sort(key=lambda x: x[0], reverse=True)

	# Take top matches
	matches = [match[1] for match in scored_matches[:limit]]

	print(f"Found {len(matches)} relevant facts")

	if matches:
	result = "Relevant Knowledge:\n"
	for i, match in enumerate(matches, 1):
	result += f"{i}. {match}\n"
	return result
	else:
	return "No directly relevant facts found.\n\nTry asking about topics that might be in your knowledge base, or add more knowledge first!"

	def handle_add_knowledge(text):
	"""Handle adding knowledge from text input"""
	if not text or text.strip() == "":
	return "Please enter some text to extract knowledge from.", ""

	print(f"Adding knowledge from text input: {text[:1000]}...")
	result = kb_add_to_graph(text)
	print(f"Knowledge added: {result}")

	# Return enhanced status with current knowledge count
	total_facts = len(kb_graph)
	status = f"Knowledge Extracted Successfully!\n\n{result}\n\nCurrent Knowledge Base: {total_facts} facts"

	# Return status and empty string to clear the input box
	return status, ""

	def show_graph_contents():
	"""
	Return all current triples as readable text with better formatting.
	"""
	print(f"Showing graph contents. Total triples: {len(graph)}")

	if len(graph) == 0:
	return "Knowledge Graph Status: EMPTY\n\nHow to build your knowledge base:\n1. Add text directly - Paste any text in the 'Add Knowledge from Text' box above\n2. Upload documents - Use the file upload to process PDF, DOCX, TXT, CSV files\n3. Extract facts - The system will automatically extract knowledge from your content\n4. Build knowledge - Add more text or files to expand your knowledge base\n5. Save knowledge - Use 'Save Knowledge' to persist your data\n\nStart by adding some text or uploading a document!"

	# Organize facts by subject for better readability
	facts_by_subject = {}
	all_facts = []

	for s, p, o in graph:
	subject = str(s).split(':')[-1] if ':' in str(s) else str(s)
	predicate = str(p).split(':')[-1] if ':' in str(p) else str(p)
	object_val = str(o)

	fact_text = f"{subject} {predicate} {object_val}"
	all_facts.append(fact_text)

	if subject not in facts_by_subject:
	facts_by_subject[subject] = []
	facts_by_subject[subject].append(f"{predicate} {object_val}")

	# Create organized display
	result = f"Knowledge Graph Overview\n"
	result += f"Total Facts: {len(graph)}\n"
	result += f"Unique Subjects: {len(facts_by_subject)}\n\n"

	# Show facts organized by subject
	result += "## Knowledge by Subject:\n\n"

	for i, (subject, facts) in enumerate(facts_by_subject.items()):
	if i >= 10: # Limit to first 10 subjects for readability
	remaining = len(facts_by_subject) - 10
	result += f"... and {remaining} more subjects\n"
	break

	result += f"{subject}:\n"
	for fact in facts:
	result += f" • {fact}\n"
	result += "\n"

	# Show all facts in a simple list
	result += "## All Facts:\n\n"
	for i, fact in enumerate(all_facts[:20]): # Show first 20 facts
	result += f"{i+1}. {fact}\n"

	if len(all_facts) > 20:
	result += f"\n... and {len(all_facts) - 20} more facts"

	# Intentionally omit search suggestions to keep the view focused on facts

	return result

	def list_facts_for_editing():
	"""Return a dropdown update with choices and build index"""
	from knowledge import fact_index
	options = []
	for i, (s, p, o) in enumerate(list(kb_graph), start=1):
	subject = str(s).split(':')[-1] if ':' in str(s) else str(s)
	predicate = str(p).split(':')[-1] if ':' in str(p) else str(p)
	object_val = str(o)
	label = f"{i}. {subject} {predicate} {object_val}"
	options.append(label)
	fact_index[i] = (s, p, o)
	status = f"Loaded {len(options)} facts for editing"
	return gr.update(choices=options, value=None), status

	def load_fact_fields(fact_label):
	"""Given a dropdown label, return subject, predicate, object fields"""
	from knowledge import load_fact_by_label
	if not fact_label:
	return "", "", ""
	triple = load_fact_by_label(fact_label)
	if not triple:
	return "", "", ""
	s, p, o = triple
	subject = str(s).split(':')[-1] if ':' in str(s) else str(s)
	predicate = str(p).split(':')[-1] if ':' in str(p) else str(p)
	object_val = str(o)
	return subject, predicate, object_val

	def update_fact(fact_label, new_subject, new_predicate, new_object):
	"""Update a single fact by ID and persist changes"""
	from knowledge import fact_index
	if not fact_label:
	return "⚠️ Select a fact first.", gr.update()
	try:
	fact_id = int(fact_label.split('.', 1)[0].strip())
	old = fact_index.get(fact_id)
	if not old:
	return "⚠️ Fact not found. Click Refresh Facts and try again.", gr.update()
	s_old, p_old, o_old = old
	# Remove old triple
	kb_graph.remove((s_old, p_old, o_old))
	# Add new triple
	s_new = rdflib.URIRef(f"urn:{new_subject.strip()}")
	p_new = rdflib.URIRef(f"urn:{new_predicate.strip()}")
	o_new = rdflib.Literal(new_object.strip())
	kb_graph.add((s_new, p_new, o_new))
	# Persist
	kb_save_knowledge_graph()
	# Refresh list
	options_update, _ = list_facts_for_editing()
	return "✅ Fact updated and saved.", options_update
	except Exception as e:
	return f"❌ Update failed: {e}", gr.update()

	def delete_fact(fact_label):
	"""Delete a single fact by ID and persist changes"""
	from knowledge import fact_index
	if not fact_label:
	return "⚠️ Select a fact first.", gr.update()
	try:
	fact_id = int(fact_label.split('.', 1)[0].strip())
	old = fact_index.get(fact_id)
	if not old:
	return "⚠️ Fact not found. Click Refresh Facts and try again.", gr.update()
	kb_graph.remove(old)
	kb_save_knowledge_graph()
	options_update, _ = list_facts_for_editing()
	return "🗑️ Fact deleted.", options_update
	except Exception as e:
	return f"❌ Delete failed: {e}", gr.update()

	def visualize_knowledge_graph():
	"""Create an interactive network visualization of the knowledge graph"""
	global graph

	if len(graph) == 0:
	return "<p>No knowledge in graph. Add some text or upload a document first!</p>"

	try:
	print(f"Creating interactive network visualization for {len(graph)} facts...")

	# Create a NetworkX graph
	G = nx.Graph()
	fact_data = {}

	# Add nodes and edges from RDF triples
	for s, p, o in graph:
	subject = str(s).split(':')[-1] if ':' in str(s) else str(s)
	predicate = str(p).split(':')[-1] if ':' in str(p) else str(p)
	object_val = str(o)

	# Truncate for display
	subject_short = (subject[:30] + "...") if len(subject) > 30 else subject
	object_short = (object_val[:30] + "...") if len(object_val) > 30 else object_val

	# Add nodes
	if subject not in G:
	G.add_node(subject, display=subject_short, node_type='subject')
	if object_val not in G:
	G.add_node(object_val, display=object_short, node_type='object')

	# Add edge
	G.add_edge(subject, object_val, label=predicate)
	fact_data[(subject, object_val)] = f"{subject} {predicate} {object_val}"

	print(f"NetworkX graph created with {len(G.nodes())} nodes")

	# Limit to top 40 nodes by degree for better visualization
	if len(G.nodes()) > 40:
	degrees = dict(G.degree())
	top_nodes = sorted(degrees.items(), key=lambda x: x[1], reverse=True)[:40]
	top_node_names = [node[0] for node in top_nodes]
	G = G.subgraph(top_node_names)
	print(f"Showing top 40 nodes out of {len(graph)} total")

	# Get spring layout
	pos = nx.spring_layout(G, k=2, iterations=100, seed=42)

	# Normalize positions to fit in canvas
	import numpy as np
	x_positions = [pos[n][0] for n in G.nodes()]
	y_positions = [pos[n][1] for n in G.nodes()]

	x_min, x_max = min(x_positions), max(x_positions)
	y_min, y_max = min(y_positions), max(y_positions)

	# Scale to fit
	scale = min(500 / (x_max - x_min), 400 / (y_max - y_min)) if (x_max - x_min) > 0 and (y_max - y_min) > 0 else 50
	offset_x = 350
	offset_y = 300

	# Create SVG visualization
	svg_elements = []

	# Add edges first (so they appear behind nodes)
	for edge in G.edges():
	x1 = pos[edge[0]][0] * scale + offset_x
	y1 = pos[edge[0]][1] * scale + offset_y
	x2 = pos[edge[1]][0] * scale + offset_x
	y2 = pos[edge[1]][1] * scale + offset_y

	edge_data = G[edge[0]][edge[1]]
	label = edge_data.get('label', 'has')
	fact = fact_data.get((edge[0], edge[1]), f"{edge[0]} {label} {edge[1]}")

	svg_elements.append(f"""
	<line x1="{x1}" y1="{y1}" x2="{x2}" y2="{y2}"
	stroke="#999" stroke-width="2" opacity="0.5"
	data-label="{label}"
	onmouseover="this.style.stroke='#2196F3'; this.style.strokeWidth='3'; this.style.opacity='0.8'"
	onmouseout="this.style.stroke='#999'; this.style.strokeWidth='2'; this.style.opacity='0.5'">
	<title>{label}</title>
	</line>
	""")

	# Add nodes
	node_info = []
	for i, node in enumerate(G.nodes()):
	x = pos[node][0] * scale + offset_x
	y = pos[node][1] * scale + offset_y
	display_name = G.nodes[node].get('display', node)
	node_type = G.nodes[node].get('node_type', 'unknown')

	# Color by type
	if node_type == 'subject':
	color = '#4CAF50'
	elif node_type == 'object':
	color = '#2196F3'
	else:
	color = '#546E7A' # blue-grey

	# Get neighbors count
	neighbors = list(G.neighbors(node))
	neighbor_count = len(neighbors)

	node_info.append(f"""
	<circle cx="{x}" cy="{y}" r="{max(40, min(30, neighbor_count * 2 + 20))}"
	fill="{color}" stroke="#fff" stroke-width="2"
	data-node="{i}" data-name="{display_name}" data-count="{neighbor_count}"
	onmouseover="showNodeInfo(this)"
	onmouseout="hideNodeInfo(this)"
	onclick="showNodeDetails('{node}', '{display_name}', {neighbor_count})">
	<title>{display_name} ({neighbor_count} connections)</title>
	</circle>
	<text x="{x}" y="{y+6}" text-anchor="middle" font-size="15" font-weight="bold" fill="#000"
	pointer-events="none">{display_name[:15]}</text>
	""")

	# Combine all elements
	svg_content = '\n'.join(svg_elements + node_info)

	# Create complete HTML with interactive features
	html = f"""
	<div style="width: 100%; min-height: 700px; max-height: 800px; background: white; border: 2px solid #ddd; border-radius: 10px; padding: 20px; position: relative; overflow: auto;">
	<div style="position: absolute; top: 10px; left: 10px; background: white; padding: 10px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); z-index: 10;">
	<h3 style="margin: 0; font-size: 14px;">📊 Knowledge Network</h3>
	<p style="margin: 5px 0; font-size: 11px; color: #666;">Facts: {len(graph)} \| Nodes: {len(G.nodes())} \| Links: {len(G.edges())}</p>
	</div>

	<div id="nodeInfo" style="position: absolute; top: 10px; right: 10px; background: white; padding: 10px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); z-index: 10; display: none; max-width: 250px;">
	<div id="nodeInfoContent"></div>
	</div>

	<div style="position: absolute; bottom: 10px; left: 10px; background: #e3f2fd; padding: 8px; border-radius: 5px; font-size: 11px;">
	💡 Hover over nodes for details \| Click to explore relationships
	</div>

	<div style="position: absolute; bottom: 50px; left: 10px; background: white; padding: 10px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); font-size: 10px; min-width: 150px;">
	<strong>Node Colors:</strong><br>
	<span style="color: #4CAF50;">●</span> Green = Subjects<br>
	<span style="color: #2196F3;">●</span> Blue = Objects<br>
	<span style="color: #546E7A;">●</span> Blue-Grey = Unknown
	</div>

	<svg width="100%" height="550" style="border: 1px solid #ddd; border-radius: 5px; background: #f9f9f9; display: block;">
	{svg_content}
	</svg>

	<script>
	function showNodeInfo(element) {{
	var name = element.getAttribute('data-name');
	var count = element.getAttribute('data-count');
	var infoDiv = document.getElementById('nodeInfo');
	var infoContent = document.getElementById('nodeInfoContent');

	infoContent.innerHTML = '<strong>' + name + '</strong><br>Connections: ' + count;
	infoDiv.style.display = 'block';
	}}

	function hideNodeInfo(element) {{
	document.getElementById('nodeInfo').style.display = 'none';
	}}

	function showNodeDetails(nodeName, displayName, count) {{
	var fullText = nodeName.length > 100 ? nodeName.substring(0, 150) + '...' : nodeName;
	alert('📊 Research Entity: ' + displayName + '\\n🔗 Relationships: ' + count + '\\n\\n📝 Full Entity: ' + fullText);
	}}
	</script>
	</div>
	"""

	print(f" Interactive network visualization created successfully")
	return html

	except Exception as e:
	print(f" Error creating visualization: {e}")
	import traceback
	traceback.print_exc()
	return f"<p style='color: red; padding: 20px;'>Error creating visualization: {e}</p>"

	# =========================================================
	# 📁 File Processing Functions
	# =========================================================

	def extract_text_from_pdf(file_path):
	"""Extract text from PDF file with better error handling"""
	try:
	with open(file_path, 'rb') as file:
	pdf_reader = PyPDF2.PdfReader(file)
	text = ""
	print(f" PDF has {len(pdf_reader.pages)} pages")

	for i, page in enumerate(pdf_reader.pages):
	page_text = page.extract_text()
	text += page_text + "\n"
	print(f" Page {i+1}: {len(page_text)} characters")

	extracted_text = text.strip()
	print(f" Total extracted: {len(extracted_text)} characters")
	print(f" First 200 chars: {extracted_text[:200]}...")

	return extracted_text
	except Exception as e:
	error_msg = f"Error reading PDF: {e}"
	print(f" {error_msg}")
	return error_msg

	def extract_text_from_docx(file_path):
	"""Extract text from DOCX file"""
	try:
	doc = Document(file_path)
	text = ""
	for paragraph in doc.paragraphs:
	text += paragraph.text + "\n"
	return text.strip()
	except Exception as e:
	return f"Error reading DOCX: {e}"

	def extract_text_from_txt(file_path):
	"""Extract text from TXT file"""
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	return file.read().strip()
	except Exception as e:
	return f"Error reading TXT: {e}"

	def extract_text_from_csv(file_path):
	"""Extract text from CSV file"""
	try:
	df = pd.read_csv(file_path)
	# Convert DataFrame to readable text
	text = f"CSV Data with {len(df)} rows and {len(df.columns)} columns:\n\n"
	text += f"Columns: {', '.join(df.columns)}\n\n"

	# Add first few rows as examples
	text += "Sample data:\n"
	for i, row in df.head(5).iterrows():
	text += f"Row {i+1}: {dict(row)}\n"

	return text.strip()
	except Exception as e:
	return f"Error reading CSV: {e}"

	def process_uploaded_file(file):
	"""Process uploaded file and extract text"""
	if file is None:
	return "No file uploaded."

	file_path = file.name
	file_extension = os.path.splitext(file_path)[1].lower()

	print(f"📁 Processing file: {file_path} (type: {file_extension})")

	# Extract text based on file type
	if file_extension == '.pdf':
	extracted_text = extract_text_from_pdf(file_path)
	elif file_extension == '.docx':
	extracted_text = extract_text_from_docx(file_path)
	elif file_extension == '.txt':
	extracted_text = extract_text_from_txt(file_path)
	elif file_extension == '.csv':
	extracted_text = extract_text_from_csv(file_path)
	else:
	return f" Unsupported file type: {file_extension}\n\nSupported formats: PDF, DOCX, TXT, CSV"

	if extracted_text.startswith("Error"):
	return f" {extracted_text}"

	# Store extracted text for debugging
	update_extracted_text(extracted_text)

	# Show preview of extracted text
	preview = extracted_text[:300] + "..." if len(extracted_text) > 300 else extracted_text
	print(f" Extracted text preview: {preview}")

	# Add extracted text to knowledge graph
	result = add_to_graph(extracted_text)

	# Return detailed summary
	file_size = len(extracted_text)
	return f" Successfully processed {os.path.basename(file_path)}!\n\n📊 File stats:\n• Size: {file_size:,} characters\n• Type: {file_extension.upper()}\n\n Text preview:\n{preview}\n\n{result}"

	def handle_file_upload(files):
	"""Handle multiple file uploads and processing"""
	global processed_files

	if not files or len(files) == 0:
	return "Please select at least one file to process."

	results = []
	new_processed = []

	for file in files:
	if file is None:
	continue

	try:
	# Handle both file objects and string paths
	if isinstance(file, str):
	file_path = file
	file_name = os.path.basename(file)
	else:
	file_path = file.name
	file_name = os.path.basename(file.name)

	# Check if file was already processed
	if any(f['name'] == file_name for f in processed_files):
	results.append(f"SKIP: {file_name} - Already processed, skipping")
	continue

	# Process the file
	result = process_uploaded_file(file)
	results.append(f"SUCCESS: {file_name} - {result}")

	# Add to processed files list
	new_processed.append({
	'name': file_name,
	'size': os.path.getsize(file_path) if os.path.exists(file_path) else 0,
	'processed_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
	'facts_added': len(graph) - sum(f.get('facts_count', 0) for f in processed_files)
	})

	except Exception as e:
	# Handle both file objects and string paths for error reporting
	if isinstance(file, str):
	file_name = os.path.basename(file)
	else:
	file_name = os.path.basename(file.name) if hasattr(file, 'name') else str(file)

	error_msg = f"ERROR: {file_name} - Error: {e}"
	print(error_msg)
	results.append(error_msg)

	# Update processed files list
	processed_files.extend(new_processed)

	# Create summary
	total_files = len(files)
	successful = len([r for r in results if r.startswith("SUCCESS")])
	skipped = len([r for r in results if r.startswith("SKIP")])
	failed = len([r for r in results if r.startswith("ERROR")])

	summary = f"Upload Summary:\n"
	summary += f"• Total files: {total_files}\n"
	summary += f"• Successfully processed: {successful}\n"
	summary += f"• Already processed: {skipped}\n"
	summary += f"• Failed: {failed}\n"
	summary += f"• Total facts in knowledge base: {len(graph)}\n\n"

	# Add individual results
	summary += "File Results:\n"
	for result in results:
	summary += f"{result}\n"

	# Return single status message
	return summary

	def show_processed_files():
	"""Show list of processed files"""
	global processed_files

	if not processed_files:
	return "No files processed yet.\n\nStart building your knowledge base:\n1. Select one or more files (PDF, DOCX, TXT, CSV)\n2. Click 'Process Files' to extract knowledge\n3. View your processed files here\n4. Upload more files to expand your knowledge base!"

	result = f"Processed Files ({len(processed_files)}):\n\n"

	for i, file_info in enumerate(processed_files, 1):
	result += f"{i}. {file_info['name']}\n"
	result += f" • Size: {file_info['size']:,} bytes\n"
	result += f" • Processed: {file_info['processed_at']}\n"
	result += f" • Facts added: {file_info.get('facts_added', 'Unknown')}\n\n"

	result += f"Total Knowledge Base: {len(graph)} facts\n"
	result += f"Ready for more uploads!"

	return result

	def clear_processed_files():
	"""Clear the processed files list"""
	global processed_files
	processed_files = []
	return "Processed files list cleared. You can now re-upload previously processed files."


	def simple_test():
	"""Simple test function to verify event handlers work"""
	print("🔔 Simple test function called!")
	return " Event handler is working! Button clicked successfully!"

	# Global variable to store last extracted text
	last_extracted_text = ""

	# Global variable to track processed files
	processed_files = []

	def show_extracted_text():
	"""Show the last extracted text from file processing"""
	global last_extracted_text

	if not last_extracted_text:
	return " No file has been processed yet.\n\nUpload a file and process it to see the extracted text here."

	# Show first 1000 characters
	preview = last_extracted_text[:1000]
	if len(last_extracted_text) > 1000:
	preview += "\n\n... (truncated, showing first 1000 characters)"

	return f" Last Extracted Text:\n\n{preview}"

	def update_extracted_text(text):
	"""Update the global variable with extracted text"""
	global last_extracted_text
	last_extracted_text = text

	def delete_all_knowledge():
	"""Delete all knowledge from the graph"""
	global graph
	count = len(graph)
	graph = rdflib.Graph() # Create a new empty graph
	save_knowledge_graph() # Save the empty graph
	return f"🗑️ Deleted all {count} facts from the knowledge graph. Graph is now empty."

	def handle_delete_all(confirm_text):
	"""Validate confirmation and delete all knowledge"""
	if not confirm_text or confirm_text.strip().upper() != "DELETE":
	return "⚠️ Type DELETE to confirm full deletion."
	return kb_delete_all_knowledge()

	def delete_knowledge_by_keyword(keyword):
	"""Delete knowledge containing a specific keyword"""
	global graph
	if not keyword or keyword.strip() == "":
	return "⚠️ Please enter a keyword to search for."

	keyword = keyword.strip().lower()
	deleted_count = 0
	facts_to_remove = []

	# Find facts containing the keyword
	for s, p, o in graph:
	fact_text = f"{s} {p} {o}".lower()
	if keyword in fact_text:
	facts_to_remove.append((s, p, o))

	# Remove the facts
	for fact in facts_to_remove:
	graph.remove(fact)
	deleted_count += 1

	if deleted_count > 0:
	save_knowledge_graph() # Save after deletion
	return f"🗑️ Deleted {deleted_count} facts containing '{keyword}'"
	else:
	return f"ℹ️ No facts found containing '{keyword}'"

	def delete_recent_knowledge(count=5):
	"""Delete the most recently added knowledge"""
	global graph
	if len(graph) == 0:
	return "ℹ️ Knowledge graph is already empty."

	# Convert graph to list to get order
	facts = list(graph)
	facts_to_remove = facts[-count:] if count < len(facts) else facts

	# Remove the facts
	for fact in facts_to_remove:
	graph.remove(fact)

	save_knowledge_graph() # Save after deletion
	return f"🗑️ Deleted {len(facts_to_remove)} most recent facts"


	# =========================================================
	# 🤖 2. Intelligent Response Generation
	# =========================================================

	def generate_intelligent_response(message, context, system_message):
	"""Generate intelligent responses based on available facts"""
	message_lower = message.lower()

	# Document understanding questions
	if any(phrase in message_lower for phrase in [
	'what is the document about', 'whats the document about', 'what is this about', 'whats this about',
	'describe the document', 'summarize the document', 'what does this contain', 'what is this about'
	]):
	return generate_document_summary(context)

	# General "what" questions
	elif message_lower.startswith('what'):
	return generate_what_response(message, context)

	# "Who" questions
	elif message_lower.startswith('who'):
	return generate_who_response(message, context)

	# "When" questions
	elif message_lower.startswith('when'):
	return generate_when_response(message, context)

	# "Where" questions
	elif message_lower.startswith('where'):
	return generate_where_response(message, context)

	# "How much" or amount questions
	elif any(phrase in message_lower for phrase in [
	'how much', 'amount', 'total', 'cost', 'price'
	]):
	return generate_amount_response(message, context)

	# Default intelligent response
	else:
	return generate_general_response(message, context)

	def generate_document_summary(context):
	"""Generate a summary of what the document is about"""
	if not context or "No directly relevant facts found" in context:
	return "I don't have enough information about this document to provide a summary. Please add more knowledge to the knowledge base first."

	# Extract key information from context
	facts = []
	lines = context.split('\n')
	for line in lines:
	if line.strip() and not line.startswith('**'):
	facts.append(line.strip())

	# Analyze the facts to understand document type
	document_type = "document"
	key_info = []

	for fact in facts:
	fact_lower = fact.lower()
	if 'invoice' in fact_lower or 'bill' in fact_lower:
	document_type = "invoice"
	elif 'contract' in fact_lower or 'agreement' in fact_lower:
	document_type = "contract"
	elif 'report' in fact_lower or 'analysis' in fact_lower:
	document_type = "report"
	elif 'company' in fact_lower or 'organization' in fact_lower or 'name' in fact_lower:
	key_info.append(fact)
	elif 'amount' in fact_lower or 'total' in fact_lower or 'cost' in fact_lower or 'price' in fact_lower:
	key_info.append(fact)
	elif 'date' in fact_lower or 'time' in fact_lower:
	key_info.append(fact)
	elif 'address' in fact_lower or 'location' in fact_lower:
	key_info.append(fact)
	elif 'description' in fact_lower or 'type' in fact_lower:
	key_info.append(fact)
	elif 'id' in fact_lower or 'number' in fact_lower or 'code' in fact_lower:
	key_info.append(fact)

	# Generate summary
	summary = f"Based on the information in my knowledge base, this appears to be a {document_type} document. "

	if key_info:
	summary += "Here are the key details I found:\n\n"
	for info in key_info[:5]: # Limit to 5 most relevant facts
	summary += f"• {info}\n"
	else:
	summary += "However, I don't have enough specific details to provide a comprehensive summary."

	return summary

	def generate_what_response(message, context):
	"""Generate responses for 'what' questions"""
	if not context or "No directly relevant facts found" in context:
	return "I don't have information about that topic in my knowledge base. Try asking about specific details that might be in the document."

	# Extract relevant facts
	facts = []
	lines = context.split('\n')
	for line in lines:
	if line.strip() and not line.startswith('**'):
	facts.append(line.strip())

	if not facts:
	return "I don't have specific information about that in my knowledge base."

	# Generate contextual response
	response = "Based on my knowledge base, here's what I can tell you:\n\n"
	for fact in facts[:3]: # Show top 3 most relevant facts
	response += f"• {fact}\n"

	if len(facts) > 3:
	response += f"\nI have {len(facts)} total facts about this topic in my knowledge base."

	return response

	def generate_who_response(message, context):
	"""Generate responses for 'who' questions"""
	if not context or "No directly relevant facts found" in context:
	return "I don't have information about people or entities in my knowledge base."

	# Look for person/company related facts
	facts = []
	lines = context.split('\n')
	for line in lines:
	if line.strip() and not line.startswith('**'):
	if any(keyword in line.lower() for keyword in ['company', 'name', 'person', 'επωνυμία', 'εταιρεία']):
	facts.append(line.strip())

	if not facts:
	return "I don't have specific information about people or companies in my knowledge base."

	response = "Here's what I know about people/entities:\n\n"
	for fact in facts:
	response += f"• {fact}\n"

	return response

	def generate_when_response(message, context):
	"""Generate responses for 'when' questions"""
	if not context or "No directly relevant facts found" in context:
	return "I don't have date information in my knowledge base."

	# Look for date related facts
	facts = []
	lines = context.split('\n')
	for line in lines:
	if line.strip() and not line.startswith('**'):
	if any(keyword in line.lower() for keyword in ['date', 'ημερομηνία', 'due', 'προθεσμία']):
	facts.append(line.strip())

	if not facts:
	return "I don't have specific date information in my knowledge base."

	response = "Here's the date information I have:\n\n"
	for fact in facts:
	response += f"• {fact}\n"

	return response

	def generate_where_response(message, context):
	"""Generate responses for 'where' questions"""
	if not context or "No directly relevant facts found" in context:
	return "I don't have location information in my knowledge base."

	# Look for address/location related facts
	facts = []
	lines = context.split('\n')
	for line in lines:
	if line.strip() and not line.startswith('**'):
	if any(keyword in line.lower() for keyword in ['address', 'διεύθυνση', 'location', 'place']):
	facts.append(line.strip())

	if not facts:
	return "I don't have specific location information in my knowledge base."

	response = "Here's the location information I have:\n\n"
	for fact in facts:
	response += f"• {fact}\n"

	return response

	def generate_amount_response(message, context):
	"""Generate responses for amount/money questions"""
	if not context or "No directly relevant facts found" in context:
	return "I don't have financial information in my knowledge base."

	# Look for amount/money related facts
	facts = []
	lines = context.split('\n')
	for line in lines:
	if line.strip() and not line.startswith('**'):
	if any(keyword in line.lower() for keyword in ['amount', 'total', 'price', 'cost', 'σύνολο', 'φόρος', '€', '$']):
	facts.append(line.strip())

	if not facts:
	return "I don't have specific financial information in my knowledge base."

	response = "Here's the financial information I have:\n\n"
	for fact in facts:
	response += f"• {fact}\n"

	return response

	def generate_general_response(message, context):
	"""Generate general intelligent responses"""
	if not context or "No directly relevant facts found" in context:
	return "I don't have specific information about that topic in my knowledge base. Try asking about details that might be in the uploaded document, like company names, dates, amounts, or addresses."

	# Extract facts and provide intelligent response
	facts = []
	lines = context.split('\n')
	for line in lines:
	if line.strip() and not line.startswith('**'):
	facts.append(line.strip())

	if not facts:
	return "I don't have relevant information about that in my knowledge base."

	response = "Based on my knowledge base, here's what I can tell you:\n\n"
	for fact in facts[:4]: # Show top 4 most relevant facts
	response += f"• {fact}\n"

	if len(facts) > 4:
	response += f"\nI have {len(facts)} total relevant facts about this topic."

	return response

	# =========================================================
	# 🤖 3. Reasoning Function (LLM + Symbolic Context)
	# =========================================================
	def respond(message, history, system_message="You are an intelligent assistant that answers questions based on factual information from a knowledge base. You provide clear, accurate, and helpful responses. When you have relevant information, you share it directly. When you don't have enough information, you clearly state this limitation. You always stay grounded in the facts provided and never hallucinate information.", max_tokens=256, temperature=0.7, top_p=0.9):
	# Step 1: retrieve context from symbolic KB
	context = retrieve_context(message)

	# Step 2: Try intelligent response generation first
	try:
	intelligent_response = generate_intelligent_response(message, context, system_message)
	print(f"🧠 Generated intelligent response for: {message[:50]}...")
	return intelligent_response
	except Exception as e:
	print(f"⚠️ Intelligent response failed: {e}")
	# Fall back to AI model approach

	# Step 3: Fallback to AI models if intelligent response fails
	# Enhanced prompt for better responses
	prompt = (
	f"{system_message}\n\n"
	f"Context from knowledge base:\n{context}\n\n"
	f"User Question: {message}\n\n"
	f"Instructions:\n"
	f"- Answer based ONLY on the facts provided above\n"
	f"- Be specific and factual\n"
	f"- If you don't have enough information, say so clearly\n"
	f"- Provide a helpful and informative response\n"
	f"- Keep your answer concise but complete\n\n"
	f"Answer:"
	)

	try:
	# Try to get HF token from environment variables
	hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")

	# Enhanced model list - more powerful free models, ordered by quality
	models_to_try = [
	# High-quality free models (no token required)
	("microsoft/DialoGPT-medium", None), # Conversational AI, good for Q&A
	("facebook/blenderbot-400M-distill", None), # Facebook's conversational model
	("microsoft/DialoGPT-small", None), # Smaller but reliable DialoGPT
	("distilgpt2", None), # Fast and reliable
	("gpt2", None), # Most reliable fallback
	]

	# Add authenticated models if token is available (these are usually better)
	if hf_token:
	# Insert powerful authenticated models at the beginning
	authenticated_models = [
	("HuggingFaceH4/zephyr-7b-beta", hf_token), # High-quality instruction following
	("microsoft/DialoGPT-large", hf_token), # Large conversational model
	("facebook/blenderbot-1B-distill", hf_token), # Large Facebook model
	("EleutherAI/gpt-neo-125M", hf_token), # GPT-Neo model
	]
	models_to_try = authenticated_models + models_to_try

	# Try each model
	for model, token in models_to_try:
	try:
	print(f"🔄 Attempting to use model: {model}")

	# Create client
	if token:
	client = InferenceClient(model=model, token=token)
	else:
	client = InferenceClient(model=model)

	# Try to generate response with optimized parameters
	result = client.text_generation(
	prompt=prompt,
	max_new_tokens=min(int(max_tokens), 150), # Optimized for speed
	temperature=min(float(temperature), 0.8), # Cap temperature for consistency
	top_p=min(float(top_p), 0.9), # Cap top_p for better quality
	repetition_penalty=1.1, # Slightly higher to avoid repetition
	do_sample=True, # Enable sampling for better responses
	stream=False,
	return_full_text=False,
	)

	print(f" Successfully generated response using: {model}")
	return result.strip()

	except Exception as model_error:
	print(f" Model {model} failed: {model_error}")
	continue # Try next model

	# If all models failed, provide intelligent fallback
	print("⚠️ All models failed, providing intelligent fallback")
	fallback_response = generate_intelligent_response(message, context, system_message)
	return fallback_response

	except Exception as e:
	# Ultimate fallback - even if everything fails
	print(f"💥 Complete failure: {e}")
	return f"🤖 I'm having trouble connecting to AI models right now, but I can still help!\n\nBased on your knowledge graph, I found these relevant facts:\n{context}\n\nFor your question '{message}', I'd suggest checking the facts above. Try adding more information to the knowledge graph or check back later when the AI models are working properly."

	def generate_mock_response(message, context, system_message):
	"""Generate a helpful response even when AI models fail"""

	# Simple keyword-based responses
	message_lower = message.lower()

	if any(word in message_lower for word in ['hello', 'hi', 'hey', 'greetings']):
	return f"👋 Hello! I'm your reasoning assistant. I found these facts in your knowledge base:\n\n{context}\n\nHow can I help you today?"

	elif any(word in message_lower for word in ['what', 'who', 'when', 'where', 'how', 'why']):
	return f"🤔 Great question! Based on your knowledge graph, here's what I found:\n\n{context}\n\nWhile I can't provide a full AI-generated answer right now, these facts from your knowledge base should help you understand the topic better."

	elif any(word in message_lower for word in ['help', 'assist', 'support']):
	return f"🆘 I'm here to help! Your knowledge graph contains:\n\n{context}\n\nYou can:\n• Add more information to the knowledge graph\n• Ask specific questions about the facts\n• Try again later when AI models are working"

	else:
	return f"💭 Interesting question! From your knowledge base, I found:\n\n{context}\n\nWhile I'm having technical difficulties with AI models, I can still help you explore the information you've added to the knowledge graph. Try asking more specific questions or adding more context!"

	# =========================================================
	# 💬 3. Gradio Interface Definition
	# =========================================================

	def save_and_backup():
	"""Save knowledge and create backup"""
	save_result = kb_save_knowledge_graph()
	kb_create_comprehensive_backup()
	return BACKUP_FILE, save_result

	def refresh_visualization(*args):
	"""Wrapper to refresh visualization, ignoring any arguments from previous handlers"""
	return kb_visualize_knowledge_graph()

	# =========================================================
	# 🧩 4. Interface Layout
	# =========================================================
	with gr.Blocks(title="Research Brain") as demo:
	# Add custom CSS for blue-grey theme - remove all orange!
	demo.css = """
	<style>
	/* Hide window control buttons (minimize, maximize, close) */
	button[aria-label*="close"],
	button[aria-label*="Close"],
	button[aria-label*="minimize"],
	button[aria-label*="Minimize"],
	button[aria-label*="maximize"],
	button[aria-label*="Maximize"],
	.gradio-container button[aria-label],
	.gradio-container .toolbar button,
	.gradio-container header button,
	div[class*="window-controls"],
	div[class*="title-bar"] button,
	.control-buttons,
	.window-controls {
	display: none !important;
	visibility: hidden !important;
	}

	/* Override all button colors - remove orange completely */
	button {
	background-color: #546E7A !important;
	border-color: #546E7A !important;
	color: white !important;
	}

	button.primary {
	background-color: #546E7A !important;
	border-color: #546E7A !important;
	color: white !important;
	}

	button.primary:hover,
	button:hover {
	background-color: #455A64 !important;
	border-color: #455A64 !important;
	}

	button.secondary {
	background-color: #78909C !important;
	border-color: #78909C !important;
	color: white !important;
	}

	button.secondary:hover {
	background-color: #607D8B !important;
	border-color: #607D8B !important;
	}

	button:focus,
	button.primary:focus {
	border-color: #546E7A !important;
	box-shadow: 0 0 0 2px rgba(84, 110, 122, 0.2) !important;
	}

	/* Make white boxes light grey */
	textarea,
	input[type="text"],
	.wrap,
	.output-text,
	.panel,
	.chatbot,
	.chat,
	.message {
	background-color: #F5F5F5 !important;
	}

	textarea:focus,
	input[type="text"]:focus,
	textarea:focus {
	background-color: #FFFFFF !important;
	border-color: #546E7A !important;
	}

	/* Chat interface styling */
	.chatbot-message,
	.conversation-box,
	[class*="chat"],
	[class*="message"] {
	background-color: #F8F8F8 !important;
	}

	/* File upload drag area - ALWAYS light grey background */
	[class*="file-drop"],
	[class*="upload"],
	input[type="file"],
	.gradio-file,
	.gradio-file [class*="component"],
	[class*="file-drop"]:hover,
	[class*="upload"]:hover,
	[class*="file-drop"]:focus,
	[class*="upload"]:focus,
	[class*="file-drop"]:active,
	[class*="upload"]:active,
	div[role="button"],
	[data-testid="file-upload"],
	[class*="drag"],
	div[class="wrap"][class="file"],
	div[class="component"][class="file"] {
	background-color: #F5F5F5 !important;
	background: #F5F5F5 !important;
	border-color: #546E7A !important;
	}

	/* Force light grey background for Gradio file components */
	div[class*="wrap"].gradio-file,
	div[class="wrap"][class="file"],
	[class="wrap"][class="gradio"] {
	background-color: #F5F5F5 !important;
	background: #F5F5F5 !important;
	}

	[class="file-drop"]:hover ,
	[class="upload"]:hover ,
	[class="file-drop"]:focus ,
	[class="upload"]:focus {
	background-color: #F5F5F5 !important;
	}

	/* Make download file component compact */
	.gradio-file .component {
	min-height: 60px !important;
	max-height: 70px !important;
	}

	.gradio-file button {
	padding: 8px !important;
	font-size: 12px !important;
	}

	/* Specifically target Gradio file upload components */
	.upload-component,
	[class*="component"],
	.rounded,
	.border,
	div.wrap,
	div[class*="wrap"] {
	transition: none !important;
	}

	.upload-component:hover,
	[class*="component"]:hover,
	div.wrap:hover,
	div[class*="wrap"]:hover {
	background-color: #F5F5F5 !important;
	border-color: #546E7A !important;
	}

	/* Target all child elements within file upload */
	[class*="file"]:hover,
	[class*="File"]:hover,
	[id*="upload"]:hover,
	[id*="Upload"]:hover {
	background-color: #F5F5F5 !important;
	}

	/* Override any SVG or icon colors */
	[class*="file"] svg:hover,
	[class*="upload"] svg:hover {
	fill: #546E7A !important;
	}

	/* Nuclear option - target EVERYTHING that could be file upload */
	div[id*="file"],
	div[id*="File"],
	div[id*="upload"],
	div[id*="Upload"],
	.file,
	div.file,
	div.upload,
	.gradio-file *,
	div.w-full *,
	div[class="wrap"] ,
	div[class="wrap"][class="file"] * {
	background: #F5F5F5 !important;
	background-color: #F5F5F5 !important;
	}

	/* Remove hover transitions completely for file upload */
	div:hover[class*="file"],
	div:hover[class*="upload"],
	div:hover[id*="file"],
	div:hover[id*="upload"] {
	background: #F5F5F5 !important;
	background-color: #F5F5F5 !important;
	}

	/* Force no background change on hover for upload areas specifically */
	div[class*="upload"]:hover,
	span[class*="upload"]:hover,
	button[class*="upload"]:hover,
	form[class*="upload"]:hover,
	div[role="button"][class*="file"]:hover,
	div[class="wrap"][class="file"]:hover {
	background-color: #F5F5F5 !important;
	border-color: #546E7A !important;
	opacity: 1 !important;
	}

	/* Disable hover animation for file upload */
	div[class*="file"]:hover,
	div[class*="upload"]:hover {
	transform: none !important;
	box-shadow: 0 1px 2px rgba(0,0,0,0.1) !important;
	}

	/* Remove any orange/red accent colors */
	* {
	--tw-gradient-from: transparent !important;
	--tw-gradient-to: transparent !important;
	}

	/* Override any Gradio default colors */
	.gradio-container,
	.gradio-button-primary,
	button[class*="primary"],
	button[class*="button"] {
	background-color: #546E7A !important;
	border-color: #546E7A !important;
	color: white !important;
	}

	button[class*="primary"]:hover,
	button[class*="button"]:hover {
	background-color: #455A64 !important;
	border-color: #455A64 !important;
	}

	/* Style chat send button - blue-grey instead of grey */
	button[aria-label*="send"],
	[role="button"][aria-label*="Send"],
	button[class*="send-button"],
	button[aria-label*="Send message"],
	svg[class*="send"],
	button svg,
	.chat button svg {
	background-color: #546E7A !important;
	color: white !important;
	}

	/* Remove grey container around chat send button */
	button[aria-label*="send"],
	[role="button"][aria-label*="Send"],
	.chat input[type="submit"],
	.chat button {
	border: none !important;
	box-shadow: none !important;
	}

	/* Style the circular send button background */
	button[aria-label*="send"],
	button[aria-label*="Send message"] {
	background-color: #546E7A !important;
	border: none !important;
	}

	button[aria-label*="send"]:hover,
	button[aria-label*="Send message"]:hover {
	background-color: #455A64 !important;
	}

	/* Make Save Knowledge button compact */
	button[data-testid*="save"],
	.gradio-button {
	max-width: 200px !important;
	}
	</style>

	<script>
	// Force file upload area to stay light grey
	setInterval(function() {
	var fileElements = document.querySelectorAll('[class="file"], [class="upload"], [id="file"], [id="upload"], input[type="file"]');
	fileElements.forEach(function(el) {
	var parent = el.closest('div');
	if (parent) {
	parent.style.backgroundColor = '#F5F5F5';
	parent.style.background = '#F5F5F5';
	}
	});

	// Style chat send button with blue-grey
	var chatButtons = document.querySelectorAll('button[aria-label="send"], button[aria-label="Send"], button[class*="send-button"]');
	chatButtons.forEach(function(btn) {
	btn.style.border = 'none';
	btn.style.background = '#546E7A';
	btn.style.backgroundColor = '#546E7A';
	btn.style.color = 'white';
	btn.style.boxShadow = 'none';

	// Style on hover
	if (!btn.hasAttribute('data-styled')) {
	btn.setAttribute('data-styled', 'true');
	btn.addEventListener('mouseenter', function() {
	this.style.backgroundColor = '#455A64';
	});
	btn.addEventListener('mouseleave', function() {
	this.style.backgroundColor = '#546E7A';
	});
	}
	});

	// Hide window control buttons
	var hideWindowControls = function() {
	var controls = document.querySelectorAll('button[aria-label="close"], button[aria-label="Close"], button[aria-label="minimize"], button[aria-label="Minimize"], button[aria-label="maximize"], button[aria-label="Maximize"], div[class="window-controls"], div[class="title-bar"] button');
	controls.forEach(function(el) {
	el.style.display = 'none';
	el.style.visibility = 'hidden';
	});

	// Hide any buttons in the top-right corner that look like window controls
	var headerButtons = document.querySelectorAll('header button, .gradio-container > div:first-child button');
	headerButtons.forEach(function(btn) {
	if (btn.offsetParent !== null && (btn.getAttribute('aria-label') \|\| btn.textContent.trim() === '')) {
	var rect = btn.getBoundingClientRect();
	if (rect.top < 50 && rect.right > window.innerWidth - 100) {
	btn.style.display = 'none';
	btn.style.visibility = 'hidden';
	}
	}
	});
	};

	hideWindowControls();
	setInterval(hideWindowControls, 500);
	}, 100);
	</script>
	"""

	# Header with logo in top right
	logo_path = None
	for ext in [".jpeg", ".jpg", ".png"]:
	path = f"logo_G{ext}"
	if os.path.exists(path):
	logo_path = path
	break

	with gr.Row():
	with gr.Column(scale=3):
	gr.Markdown("## Research Brain\nBuild and explore knowledge graphs from research documents, publications, and datasets.")
	with gr.Column(scale=1, min_width=100):
	if logo_path:
	gr.Image(value=logo_path, label="", show_label=False, container=False, min_width=100, height=100)

	with gr.Row():
	# Sidebar: all controls grouped in sections
	with gr.Column(scale=1, min_width=320):
	gr.Markdown("### Controls")
	with gr.Accordion("Data Ingestion", open=True):
	upload_box = gr.Textbox(
	lines=5,
	placeholder="Paste research text, abstracts, findings, or any content to extract knowledge...",
	label="Add Research Content",
	)
	add_button = gr.Button("Extract Knowledge", variant="primary")
	file_upload = gr.File(
	label="Upload Research Documents (PDF, DOCX, TXT, CSV)",
	file_types=[".pdf", ".docx", ".txt", ".csv"],
	file_count="multiple"
	)
	upload_file_button = gr.Button("Process Documents", variant="primary")

	with gr.Accordion("Knowledge Base Management", open=True):
	save_button = gr.Button("Save Knowledge", variant="secondary")
	download_button = gr.File(label="Download Backup", visible=True)
	json_upload = gr.File(label="Upload Knowledge JSON", file_types=[".json"], file_count="single")
	import_json_button = gr.Button("Import Knowledge JSON", variant="secondary")
	delete_confirm = gr.Textbox(label="Type DELETE to confirm", placeholder="DELETE")
	delete_all_btn = gr.Button("Delete All Knowledge", variant="secondary")
	show_button = gr.Button("View Knowledge Base", variant="secondary")
	graph_view = gr.Textbox(label="Knowledge Contents", visible=True, lines=3, max_lines=4)

	with gr.Accordion("Edit or Remove Facts", open=False):
	refresh_facts_btn = gr.Button("Refresh Facts", variant="secondary")
	fact_selector = gr.Dropdown(label="Select Fact", choices=[], interactive=True, multiselect=False)
	subj_box = gr.Textbox(label="Subject")
	pred_box = gr.Textbox(label="Predicate")
	obj_box = gr.Textbox(label="Object", lines=2)
	with gr.Row():
	update_fact_btn = gr.Button("Update Fact", variant="primary")
	delete_fact_btn = gr.Button("Delete Fact", variant="secondary")
	fact_edit_status = gr.Textbox(label="Edit Status", interactive=False)

	graph_info = gr.Textbox(label="Status", interactive=False, visible=True, lines=1, max_lines=2)

	# Main content: Knowledge graph (large) and chat (smaller below)
	with gr.Column(scale=3):
	gr.Markdown("### Knowledge Graph Network")
	graph_plot = gr.HTML(label="Knowledge Graph", visible=True, min_height=600)

	gr.Markdown("### Research Assistant")
	chatbot = gr.ChatInterface(
	fn=lambda message, history: rqa_respond(message, history),
	title="Query Knowledge Base",
	description="Ask questions about your research data. Explore findings, relationships, and insights.",
	examples=[
	"What are the key research findings?",
	"Summarize the methodologies",
	"What relationships exist in the data?",
	"What are the important timelines?",
	"What datasets were used?"
	]
	)

	# Auto-load visualization on page load
	demo.load(
	fn=kb_visualize_knowledge_graph,
	inputs=[],
	outputs=[graph_plot]
	)

	# Event handlers for simplified UI
	add_button.click(
	fn=handle_add_knowledge,
	inputs=upload_box,
	outputs=[graph_info, upload_box]
	).then(
	fn=refresh_visualization,
	outputs=[graph_plot]
	)

	upload_file_button.click(
	fn=fp_handle_file_upload,
	inputs=file_upload,
	outputs=graph_info
	).then(
	fn=refresh_visualization,
	outputs=[graph_plot]
	)

	show_button.click(
	fn=kb_show_graph_contents,
	inputs=[],
	outputs=[graph_view]
	)

	save_button.click(
	fn=save_and_backup,
	outputs=[download_button, graph_info]
	).then(
	fn=refresh_visualization,
	outputs=[graph_plot]
	)

	import_json_button.click(
	fn=kb_import_json,
	inputs=json_upload,
	outputs=graph_info
	).then(
	fn=refresh_visualization,
	outputs=[graph_plot]
	)

	delete_all_btn.click(
	fn=handle_delete_all,
	inputs=delete_confirm,
	outputs=graph_info
	).then(
	fn=refresh_visualization,
	outputs=[graph_plot]
	)

	# Fact editor events
	refresh_facts_btn.click(
	fn=list_facts_for_editing,
	outputs=[fact_selector, fact_edit_status]
	)
	fact_selector.change(
	fn=load_fact_fields,
	inputs=fact_selector,
	outputs=[subj_box, pred_box, obj_box]
	)
	update_fact_btn.click(
	fn=update_fact,
	inputs=[fact_selector, subj_box, pred_box, obj_box],
	outputs=[fact_edit_status, fact_selector]
	).then(
	fn=refresh_visualization,
	outputs=[graph_plot]
	)
	delete_fact_btn.click(
	fn=delete_fact,
	inputs=fact_selector,
	outputs=[fact_edit_status, fact_selector]
	).then(
	fn=refresh_visualization,
	outputs=[graph_plot]
	)

	# =========================================================
	# 🚀 5. Initialize Sample Data and Launch
	# =========================================================


	if __name__ == "__main__":
	# Fix Windows console encoding issue with emojis
	import sys
	import io
	if sys.stdout.encoding != 'utf-8':
	sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
	if sys.stderr.encoding != 'utf-8':
	sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')

	# Initialize knowledge graph and load existing data
	print("Initializing knowledge graph...")
	load_result = kb_load_knowledge_graph()
	print(f"Startup: {load_result}")
	print(f"Knowledge graph ready with {len(kb_graph)} facts")

	# Launch the Gradio app
	# For Hugging Face Spaces, the platform handles the launch automatically
	# If running directly, we provide explicit parameters
	print("Launching Gradio application...")

	# Check if we're in Hugging Face Spaces (has SPACE_ID env var)
	is_hf_space = os.getenv("SPACE_ID") is not None

	if is_hf_space:
	# On Hugging Face Spaces, just launch with defaults
	# The platform will handle port binding
	print("Detected Hugging Face Spaces environment")
	demo.launch(server_name="0.0.0.0")
	else:
	# Local development - use explicit settings
	port = int(os.getenv("PORT", 7860))
	print(f"Local development mode - starting on http://127.0.0.1:{port}")
	# Bind to loopback so browsers can open localhost directly
	demo.launch(server_name="127.0.0.1", server_port=port, share=False)

	# """
	# Research Brain - STEP 6: Properly Exposed Gradio API
	# Uses correct /api/predict/{api_name} endpoints
	# """

	# import gradio as gr
	# import pickle
	# import os
	# import rdflib
	# import json

	# # Files for storing data
	# STORAGE_FILE = "knowledge_base.pkl"
	# RDF_FILE = "knowledge_graph.rdf"

	# # Initialize RDF graph
	# graph = rdflib.Graph()

	# # Load existing knowledge base
	# def load_knowledge():
	# global graph

	# facts = []
	# if os.path.exists(STORAGE_FILE):
	# try:
	# with open(STORAGE_FILE, 'rb') as f:
	# facts = pickle.load(f)
	# except:
	# facts = []

	# if os.path.exists(RDF_FILE):
	# try:
	# graph.parse(RDF_FILE, format="turtle")
	# except:
	# pass

	# return facts

	# # Save knowledge base
	# def save_knowledge(kb):
	# global graph
	# with open(STORAGE_FILE, 'wb') as f:
	# pickle.dump(kb, f)
	# try:
	# graph.serialize(destination=RDF_FILE, format="turtle")
	# except:
	# pass

	# # Initialize knowledge base
	# knowledge_base = load_knowledge()

	# # ==========================================================
	# # API FUNCTIONS - Return data directly (not JSON strings)
	# # ==========================================================

	# def api_get_knowledge_base():
	# """API: Get all facts"""
	# return {"facts": knowledge_base}

	# def api_create_fact(subject, predicate, obj, source="API"):
	# """API: Create a new fact"""
	# if not subject.strip() or not predicate.strip() or not obj.strip():
	# return {"success": False, "error": "Missing required fields"}

	# fact = {
	# "id": str(len(knowledge_base) + 1),
	# "subject": subject.strip(),
	# "predicate": predicate.strip(),
	# "object": obj.strip(),
	# "source": source
	# }
	# knowledge_base.append(fact)

	# # Add to RDF graph
	# subj_uri = rdflib.URIRef(f"urn:{subject.strip().replace(' ', '_')}")
	# pred_uri = rdflib.URIRef(f"urn:{predicate.strip().replace(' ', '_')}")
	# obj_literal = rdflib.Literal(obj.strip())
	# graph.add((subj_uri, pred_uri, obj_literal))

	# save_knowledge(knowledge_base)

	# return {"success": True, "fact": fact}

	# def api_update_fact(fact_id, subject="", predicate="", obj=""):
	# """API: Update a fact"""
	# for fact in knowledge_base:
	# if isinstance(fact, dict) and str(fact.get("id")) == str(fact_id):
	# if subject:
	# fact["subject"] = subject
	# if predicate:
	# fact["predicate"] = predicate
	# if obj:
	# fact["object"] = obj

	# # Rebuild RDF graph
	# global graph
	# graph = rdflib.Graph()
	# for f in knowledge_base:
	# if isinstance(f, dict):
	# s = rdflib.URIRef(f"urn:{f['subject'].replace(' ', '_')}")
	# p = rdflib.URIRef(f"urn:{f['predicate'].replace(' ', '_')}")
	# o = rdflib.Literal(f['object'])
	# graph.add((s, p, o))

	# save_knowledge(knowledge_base)
	# return {"success": True, "fact": fact}

	# return {"success": False, "error": "Fact not found"}

	# def api_delete_fact(fact_id):
	# """API: Delete a fact"""
	# global graph

	# for i, fact in enumerate(knowledge_base):
	# if isinstance(fact, dict) and str(fact.get("id")) == str(fact_id):
	# deleted_fact = knowledge_base.pop(i)

	# # Rebuild RDF graph
	# graph = rdflib.Graph()
	# for f in knowledge_base:
	# if isinstance(f, dict):
	# s = rdflib.URIRef(f"urn:{f['subject'].replace(' ', '_')}")
	# p = rdflib.URIRef(f"urn:{f['predicate'].replace(' ', '_')}")
	# o = rdflib.Literal(f['object'])
	# graph.add((s, p, o))

	# save_knowledge(knowledge_base)
	# return {"success": True, "deleted": deleted_fact}

	# return {"success": False, "error": "Fact not found"}

	# def api_get_graph():
	# """API: Get graph visualization data"""
	# nodes = []
	# edges = []
	# node_set = set()

	# for fact in knowledge_base:
	# if isinstance(fact, dict):
	# subj = fact.get("subject", "")
	# pred = fact.get("predicate", "")
	# obj = fact.get("object", "")

	# if subj and subj not in node_set:
	# nodes.append({"id": subj, "label": subj, "type": "concept"})
	# node_set.add(subj)

	# if obj and obj not in node_set:
	# nodes.append({"id": obj, "label": obj, "type": "entity"})
	# node_set.add(obj)

	# if subj and pred and obj:
	# edges.append({
	# "id": f"{subj}-{pred}-{obj}",
	# "source": subj,
	# "target": obj,
	# "label": pred
	# })

	# return {"nodes": nodes, "edges": edges}

	# # ==========================================================
	# # UI FUNCTIONS
	# # ==========================================================

	# def add_fact(subject, predicate, obj):
	# """Add fact via UI"""
	# result = api_create_fact(subject, predicate, obj, "UI")
	# if result["success"]:
	# fact = result["fact"]
	# return f"✅ Added fact #{fact['id']}! Total: {len(knowledge_base)} facts", "", "", ""
	# return f"⚠️ {result.get('error', 'Unknown error')}", subject, predicate, obj

	# def view_facts():
	# """View all facts"""
	# if not knowledge_base:
	# return "📭 No facts yet. Add some!"

	# result = f"📊 Knowledge Base ({len(knowledge_base)} facts, {len(graph)} RDF triples)\n\n"
	# for fact in knowledge_base:
	# if isinstance(fact, dict):
	# result += f"#{fact.get('id', '?')}: {fact.get('subject', '?')} → {fact.get('predicate', '?')} → {fact.get('object', '?')}\n"
	# return result

	# def view_rdf_graph():
	# """View RDF graph"""
	# if len(graph) == 0:
	# return "📭 RDF graph is empty"
	# try:
	# turtle_data = graph.serialize(format="turtle")
	# return f"🌐 RDF Graph ({len(graph)} triples)\n\n{turtle_data}"
	# except Exception as e:
	# return f"❌ Error: {e}"

	# def delete_all():
	# """Delete all knowledge"""
	# global graph
	# knowledge_base.clear()
	# graph = rdflib.Graph()
	# save_knowledge(knowledge_base)
	# return "🗑️ All knowledge deleted!"

	# def get_stats():
	# """Get statistics"""
	# if not knowledge_base:
	# return "No facts yet"

	# subjects = set()
	# predicates = set()
	# objects = set()

	# for fact in knowledge_base:
	# if isinstance(fact, dict):
	# subjects.add(fact.get('subject', ''))
	# predicates.add(fact.get('predicate', ''))
	# objects.add(fact.get('object', ''))

	# return f"""
	# 📊 Statistics:
	# - Total facts: {len(knowledge_base)}
	# - RDF triples: {len(graph)}
	# - Unique subjects: {len(subjects)}
	# - Unique predicates: {len(predicates)}
	# - Unique objects: {len(objects)}
	# """.strip()

	# # ==========================================================
	# # GRADIO INTERFACE
	# # ==========================================================

	# with gr.Blocks(title="Research Brain") as demo:
	# gr.Markdown("# 🧠 Research Brain - Step 6: Proper API Endpoints")
	# gr.Markdown("✅ Using /api/predict/{api_name} format!")

	# # Regular UI tabs
	# with gr.Tab("Add Fact"):
	# gr.Markdown("### Create a New Fact")

	# with gr.Row():
	# subject_input = gr.Textbox(label="Subject", placeholder="e.g., Machine Learning")
	# predicate_input = gr.Textbox(label="Predicate", placeholder="e.g., is part of")
	# object_input = gr.Textbox(label="Object", placeholder="e.g., Artificial Intelligence")

	# add_btn = gr.Button("Add Fact", variant="primary", size="lg")
	# status = gr.Textbox(label="Status", interactive=False)

	# add_btn.click(
	# fn=add_fact,
	# inputs=[subject_input, predicate_input, object_input],
	# outputs=[status, subject_input, predicate_input, object_input]
	# )

	# with gr.Tab("View Facts"):
	# with gr.Row():
	# view_btn = gr.Button("Refresh Facts", variant="secondary")
	# stats_btn = gr.Button("Show Statistics", variant="secondary")

	# output = gr.Textbox(label="Knowledge Base", lines=15)
	# stats_output = gr.Textbox(label="Statistics", lines=6)

	# view_btn.click(fn=view_facts, outputs=[output])
	# stats_btn.click(fn=get_stats, outputs=[stats_output])

	# with gr.Tab("RDF Graph"):
	# rdf_view_btn = gr.Button("View RDF Graph", variant="secondary")
	# rdf_output = gr.Textbox(label="RDF Graph (Turtle Format)", lines=15)

	# rdf_view_btn.click(fn=view_rdf_graph, outputs=[rdf_output])

	# with gr.Tab("🔌 API Testing"):
	# gr.Markdown("""
	# ### Test API Endpoints
	# These functions are exposed at `/api/predict/{api_name}`
	# """)

	# with gr.Accordion("Get Knowledge Base", open=True):
	# test_get_btn = gr.Button("Get All Facts")
	# test_get_output = gr.JSON(label="Result")
	# test_get_btn.click(
	# fn=api_get_knowledge_base,
	# outputs=[test_get_output],
	# api_name="get_knowledge_base"
	# )

	# with gr.Accordion("Create Fact", open=False):
	# with gr.Row():
	# test_subj = gr.Textbox(label="Subject", value="Test")
	# test_pred = gr.Textbox(label="Predicate", value="relates_to")
	# test_obj = gr.Textbox(label="Object", value="API")
	# test_source = gr.Textbox(label="Source", value="API", visible=False)
	# test_create_btn = gr.Button("Create Fact")
	# test_create_output = gr.JSON(label="Result")
	# test_create_btn.click(
	# fn=api_create_fact,
	# inputs=[test_subj, test_pred, test_obj, test_source],
	# outputs=[test_create_output],
	# api_name="create_fact"
	# )

	# with gr.Accordion("Update Fact", open=False):
	# with gr.Row():
	# test_update_id = gr.Textbox(label="Fact ID", value="1")
	# test_update_subj = gr.Textbox(label="Subject", value="Updated")
	# test_update_pred = gr.Textbox(label="Predicate", value="")
	# test_update_obj = gr.Textbox(label="Object", value="")
	# test_update_btn = gr.Button("Update Fact")
	# test_update_output = gr.JSON(label="Result")
	# test_update_btn.click(
	# fn=api_update_fact,
	# inputs=[test_update_id, test_update_subj, test_update_pred, test_update_obj],
	# outputs=[test_update_output],
	# api_name="update_fact"
	# )

	# with gr.Accordion("Delete Fact", open=False):
	# test_delete_id = gr.Textbox(label="Fact ID", value="1")
	# test_delete_btn = gr.Button("Delete Fact")
	# test_delete_output = gr.JSON(label="Result")
	# test_delete_btn.click(
	# fn=api_delete_fact,
	# inputs=[test_delete_id],
	# outputs=[test_delete_output],
	# api_name="delete_fact"
	# )

	# with gr.Accordion("Get Graph", open=False):
	# test_graph_btn = gr.Button("Get Graph Data")
	# test_graph_output = gr.JSON(label="Result")
	# test_graph_btn.click(
	# fn=api_get_graph,
	# outputs=[test_graph_output],
	# api_name="get_graph"
	# )

	# with gr.Tab("Manage"):
	# delete_btn = gr.Button("Delete All Facts", variant="stop")
	# delete_status = gr.Textbox(label="Status", interactive=False)
	# delete_btn.click(fn=delete_all, outputs=[delete_status])

	# print(f"📂 Loaded {len(knowledge_base)} facts")
	# print(f"🌐 RDF graph has {len(graph)} triples")
	# print(f"✅ API endpoints ready at /api/predict/{{api_name}}")

	# # Enable queue for better API handling (optional but recommended)
	# demo.queue()

	# # Launch
	# demo.launch()