Spaces:

ImagineCanada
/

hr-intervals-chatbot

Sleeping

App Files Files Community

hr-intervals-chatbot / admin.py

pikamomo

Add configurable UI settings (colors, font, avatar) to config and admin panel

082a8c7 10 days ago

raw

history blame contribute delete

16 kB

	"""
	Gradio admin interface for content management
	Allows uploading documents, scraping URLs, and managing content
	"""

	import gradio as gr
	import os
	import html as html_lib
	from dotenv import load_dotenv
	from qdrant_client import QdrantClient, models
	from src.ingestion import ingest_document
	from src.scraper import process_and_store_webpage
	from src.config import load_settings, save_settings

	load_dotenv()

	# Initialize Qdrant client
	client = QdrantClient(
	url=os.getenv("QDRANT_URL"),
	api_key=os.getenv("QDRANT_API_KEY")
	)
	collection_name = os.getenv("QDRANT_COLLECTION", "hr-intervals")

	# Create index for metadata.source to enable filtering
	try:
	client.create_payload_index(
	collection_name=collection_name,
	field_name="metadata.source",
	field_schema=models.PayloadSchemaType.KEYWORD
	)
	print("✅ Payload index for metadata.source created successfully")
	except Exception as e:
	# Index might already exist or collection not found
	print(f"ℹ️ Index status: {str(e)}")


	# ==================== Functions ====================

	def list_all_documents():
	"""
	List all uploaded documents

	Returns:
	HTML table string with selectable content
	"""
	try:
	# Paginate through ALL points (Qdrant has 5800+ points)
	all_points = []
	offset = None

	while True:
	result = client.scroll(
	collection_name=collection_name,
	limit=1000,
	offset=offset,
	with_payload=True
	)
	points, next_offset = result
	all_points.extend(points)

	if next_offset is None:
	break
	offset = next_offset

	# Group by source
	docs_dict = {}
	for point in all_points:
	payload = point.payload
	# Metadata is nested inside payload
	metadata = payload.get("metadata", {})
	source = metadata.get("source", "Unknown")

	if source not in docs_dict:
	docs_dict[source] = {
	"name": source,
	"type": metadata.get("type", "Unknown"),
	"date": metadata.get("upload_date", "Unknown"),
	"chunks": 0
	}
	docs_dict[source]["chunks"] += 1

	# Create HTML table with selectable text
	if not docs_dict or (len(docs_dict) == 1 and "Unknown" in docs_dict):
	return """
	<div style="padding: 20px; text-align: center; color: #666;">
	<p>📂 No documents yet</p>
	</div>
	"""

	html = """
	<style>
	.docs-table {
	width: 100%;
	border-collapse: collapse;
	font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Arial, sans-serif;
	user-select: text;
	-webkit-user-select: text;
	-moz-user-select: text;
	-ms-user-select: text;
	}
	.docs-table thead {
	background-color: #f8f9fa;
	}
	.docs-table th {
	padding: 12px;
	text-align: left;
	font-weight: 600;
	border-bottom: 2px solid #dee2e6;
	user-select: text;
	}
	.docs-table td {
	padding: 12px;
	border-bottom: 1px solid #dee2e6;
	user-select: text;
	cursor: text;
	}
	.docs-table tr:hover {
	background-color: #f8f9fa;
	}
	.doc-name {
	color: #0066cc;
	word-break: break-all;
	}
	</style>
	<table class="docs-table">
	<thead>
	<tr>
	<th>Document Name</th>
	<th>Type</th>
	<th>Upload Date</th>
	<th>Chunks</th>
	</tr>
	</thead>
	<tbody>
	"""

	for doc in docs_dict.values():
	html += f"""
	<tr>
	<td class="doc-name">{html_lib.escape(doc['name'])}</td>
	<td>{html_lib.escape(doc['type'])}</td>
	<td>{html_lib.escape(doc['date'])}</td>
	<td>{doc['chunks']}</td>
	</tr>
	"""

	html += """
	</tbody>
	</table>
	"""

	return html

	except Exception as e:
	return f"""
	<div style="padding: 20px; color: #dc3545;">
	<p>❌ Error: {str(e)}</p>
	</div>
	"""


	def upload_document(file, doc_type="document"):
	"""
	Upload PDF or DOCX file

	Args:
	file: Uploaded file object
	doc_type: Type of document

	Returns:
	Success message
	"""
	if file is None:
	return "❌ Please select a file"

	try:
	file_path = file.name

	# Ingest document
	num_chunks = ingest_document(file_path, doc_type)

	return f"✅ Success!\n\nFile: {os.path.basename(file_path)}\nChunks created: {num_chunks}\nType: {doc_type}"

	except Exception as e:
	return f"❌ Upload failed:\n{str(e)}"


	def scrape_single_url(url):
	"""
	Scrape single URL

	Args:
	url: URL to scrape

	Returns:
	Success message
	"""
	if not url:
	return "❌ Please enter a URL"

	try:
	num_chunks = process_and_store_webpage(url)
	return f"✅ Success!\n\nURL: {url}\nChunks created: {num_chunks}"

	except Exception as e:
	return f"❌ Scraping failed:\n{str(e)}"


	def scrape_multiple_urls(urls_text):
	"""
	Scrape multiple URLs

	Args:
	urls_text: URLs separated by newlines

	Returns:
	Summary of results
	"""
	if not urls_text:
	return "❌ Please enter URLs (one per line)"

	urls = [url.strip() for url in urls_text.split('\n') if url.strip()]

	results = []
	success_count = 0
	fail_count = 0

	for url in urls:
	try:
	num_chunks = process_and_store_webpage(url)
	results.append(f"✅ {url}: {num_chunks} chunks")
	success_count += 1
	except Exception as e:
	results.append(f"❌ {url}: {str(e)}")
	fail_count += 1

	summary = f"📊 Summary: {success_count} succeeded, {fail_count} failed\n\n"
	return summary + "\n".join(results)


	def delete_document(source_name):
	"""
	Delete document by source name

	Args:
	source_name: Name or URL of the source

	Returns:
	Success message
	"""
	if not source_name:
	return "❌ Please enter document name or URL"

	try:
	client.delete(
	collection_name=collection_name,
	points_selector=models.FilterSelector(
	filter=models.Filter(
	must=[
	models.FieldCondition(
	key="metadata.source",
	match=models.MatchValue(value=source_name)
	)
	]
	)
	)
	)

	return f"✅ Successfully deleted all content from:\n{source_name}"

	except Exception as e:
	return f"❌ Deletion failed:\n{str(e)}"


	def get_current_settings():
	"""Load current settings and return as individual values for the UI."""
	s = load_settings()
	return (
	s["disclaimer"],
	s["welcome_message"],
	s["bot_avatar_url"],
	s["primary_color"],
	s["secondary_color"],
	s["font_family"],
	)


	def save_chatbot_settings(disclaimer, welcome_message, bot_avatar_url, primary_color, secondary_color, font_family):
	"""Save chatbot settings to config file."""
	try:
	s = load_settings()
	s["disclaimer"] = disclaimer
	s["welcome_message"] = welcome_message
	s["bot_avatar_url"] = bot_avatar_url
	s["primary_color"] = primary_color
	s["secondary_color"] = secondary_color
	s["font_family"] = font_family
	save_settings(s)
	return "✅ Settings saved! Restart the chatbot space for changes to take effect."
	except Exception as e:
	return f"❌ Failed to save: {str(e)}"


	# ==================== Gradio Interface (5.49) ====================

	with gr.Blocks(
	title="HR Intervals - Admin Panel",
	theme=gr.themes.Soft()
	) as demo:

	gr.Markdown("# 📁 HR Intervals - Knowledge Base Management")
	gr.Markdown("Manage documents and web content for the AI assistant")

	with gr.Tabs():

	# Tab 1: View Documents
	with gr.Tab("📋 View Documents"):
	gr.Markdown("### Current documents in knowledge base")
	gr.Markdown("💡 Tip: You can select and copy any text from the table below")

	refresh_btn = gr.Button("🔄 Refresh List", variant="primary")

	docs_table = gr.HTML(
	label="Documents"
	)

	refresh_btn.click(list_all_documents, outputs=docs_table)
	demo.load(list_all_documents, outputs=docs_table)

	# Tab 2: Upload Documents
	with gr.Tab("⬆️ Upload Documents"):
	gr.Markdown("### Upload PDF or DOCX files")

	file_input = gr.File(
	label="Select File (PDF or DOCX)",
	file_types=[".pdf", ".docx"]
	)

	doc_type_input = gr.Radio(
	choices=["document", "policy", "guide", "article"],
	value="document",
	label="Document Type"
	)

	upload_btn = gr.Button("📤 Upload", variant="primary", size="lg")
	upload_output = gr.Textbox(label="Upload Result", lines=5)

	upload_btn.click(
	upload_document,
	inputs=[file_input, doc_type_input],
	outputs=upload_output
	)

	# Tab 3: Scrape URLs
	with gr.Tab("🌐 Scrape Web Pages"):
	gr.Markdown("### Scrape content from URLs")

	with gr.Row():
	with gr.Column():
	gr.Markdown("#### Single URL")
	url_input = gr.Textbox(
	label="Enter URL",
	placeholder="https://example.com/article"
	)
	scrape_btn = gr.Button("🔍 Scrape", variant="primary")
	scrape_output = gr.Textbox(label="Result", lines=4)

	scrape_btn.click(
	scrape_single_url,
	inputs=url_input,
	outputs=scrape_output
	)

	with gr.Column():
	gr.Markdown("#### Batch URLs")
	urls_input = gr.Textbox(
	label="Enter multiple URLs (one per line)",
	placeholder="https://example.com/page1\nhttps://example.com/page2",
	lines=6
	)
	batch_btn = gr.Button("🔍 Batch Scrape", variant="primary")
	batch_output = gr.Textbox(label="Batch Results", lines=8)

	batch_btn.click(
	scrape_multiple_urls,
	inputs=urls_input,
	outputs=batch_output
	)

	# Tab 4: Delete Documents
	with gr.Tab("🗑️ Delete Documents"):
	gr.Markdown("### Delete documents or web pages")
	gr.Markdown("⚠️ Warning: This operation cannot be undone!")

	delete_input = gr.Textbox(
	label="Document Name or URL",
	placeholder="e.g., hiring_policy.pdf or https://example.com/article"
	)

	delete_btn = gr.Button("🗑️ Delete", variant="stop", size="lg")
	delete_output = gr.Textbox(label="Delete Result", lines=3)

	delete_btn.click(
	delete_document,
	inputs=delete_input,
	outputs=delete_output
	)

	# Tab 5: Chatbot Settings
	with gr.Tab("⚙️ Chatbot Settings"):
	gr.Markdown("### Chatbot Appearance & Text Settings")
	gr.Markdown("Changes take effect after the chatbot space restarts.")

	with gr.Row():
	with gr.Column():
	setting_primary_color = gr.ColorPicker(label="Primary Color (buttons, links)")
	setting_secondary_color = gr.ColorPicker(label="Secondary Color (background)")
	setting_font = gr.Textbox(label="Font Family", placeholder="Arial, sans-serif")
	setting_avatar = gr.Textbox(label="Bot Avatar Image URL", placeholder="https://...")

	with gr.Column():
	setting_disclaimer = gr.Textbox(label="Disclaimer Text (Markdown)", lines=6)
	setting_welcome = gr.Textbox(label="Welcome Message (Markdown)", lines=8)

	save_settings_btn = gr.Button("💾 Save Settings", variant="primary", size="lg")
	settings_output = gr.Textbox(label="Result", lines=2)

	save_settings_btn.click(
	save_chatbot_settings,
	inputs=[setting_disclaimer, setting_welcome, setting_avatar,
	setting_primary_color, setting_secondary_color, setting_font],
	outputs=settings_output
	)

	demo.load(
	get_current_settings,
	outputs=[setting_disclaimer, setting_welcome, setting_avatar,
	setting_primary_color, setting_secondary_color, setting_font]
	)

	# Tab 6: Help
	with gr.Tab("ℹ️ Help"):
	gr.Markdown("""
	### Usage Guide

	#### 📋 View Documents
	- Shows all uploaded documents and web pages
	- Displays document type, upload date, and number of chunks
	- Click "Refresh" to see the latest status

	#### ⬆️ Upload Documents
	- Supports PDF and DOCX formats
	- Documents are automatically split into chunks (~1000 characters each)
	- You can categorize documents by type

	#### 🌐 Scrape Web Pages
	- Enter full URLs (including https://)
	- Supports single or batch scraping
	- Content is automatically converted to Markdown format

	#### 🗑️ Delete Documents
	- Enter exact filename or URL
	- Deletes all chunks from that source
	- Warning: Cannot be undone!
	- Tip: To update a document, delete it first then upload the new version

	---

	### Advanced Management

	For detailed vector database management, visit:
	[Qdrant Cloud Dashboard](https://cloud.qdrant.io)

	### Technical Support

	If you encounter issues, please contact the development team.
	""")


	if __name__ == "__main__":
	admin_user = os.getenv("ADMIN_USERNAME", "admin")
	admin_pass = os.getenv("ADMIN_PASSWORD", "hr-intervals-2026")

	demo.launch(
	server_name="0.0.0.0",
	server_port=7861,
	share=False,
	auth=(admin_user, admin_pass),
	)