Spaces:
Sleeping
Sleeping
| """ | |
| Gradio admin interface for content management | |
| Allows uploading documents, scraping URLs, and managing content | |
| """ | |
| import gradio as gr | |
| import os | |
| import html as html_lib | |
| from dotenv import load_dotenv | |
| from qdrant_client import QdrantClient, models | |
| from src.ingestion import ingest_document | |
| from src.scraper import process_and_store_webpage | |
| from src.config import load_settings, save_settings | |
| load_dotenv() | |
| # Initialize Qdrant client | |
| client = QdrantClient( | |
| url=os.getenv("QDRANT_URL"), | |
| api_key=os.getenv("QDRANT_API_KEY") | |
| ) | |
| collection_name = os.getenv("QDRANT_COLLECTION", "hr-intervals") | |
| # Create index for metadata.source to enable filtering | |
| try: | |
| client.create_payload_index( | |
| collection_name=collection_name, | |
| field_name="metadata.source", | |
| field_schema=models.PayloadSchemaType.KEYWORD | |
| ) | |
| print("β Payload index for metadata.source created successfully") | |
| except Exception as e: | |
| # Index might already exist or collection not found | |
| print(f"βΉοΈ Index status: {str(e)}") | |
| # ==================== Functions ==================== | |
| def list_all_documents(): | |
| """ | |
| List all uploaded documents | |
| Returns: | |
| HTML table string with selectable content | |
| """ | |
| try: | |
| # Paginate through ALL points (Qdrant has 5800+ points) | |
| all_points = [] | |
| offset = None | |
| while True: | |
| result = client.scroll( | |
| collection_name=collection_name, | |
| limit=1000, | |
| offset=offset, | |
| with_payload=True | |
| ) | |
| points, next_offset = result | |
| all_points.extend(points) | |
| if next_offset is None: | |
| break | |
| offset = next_offset | |
| # Group by source | |
| docs_dict = {} | |
| for point in all_points: | |
| payload = point.payload | |
| # Metadata is nested inside payload | |
| metadata = payload.get("metadata", {}) | |
| source = metadata.get("source", "Unknown") | |
| if source not in docs_dict: | |
| docs_dict[source] = { | |
| "name": source, | |
| "type": metadata.get("type", "Unknown"), | |
| "date": metadata.get("upload_date", "Unknown"), | |
| "chunks": 0 | |
| } | |
| docs_dict[source]["chunks"] += 1 | |
| # Create HTML table with selectable text | |
| if not docs_dict or (len(docs_dict) == 1 and "Unknown" in docs_dict): | |
| return """ | |
| <div style="padding: 20px; text-align: center; color: #666;"> | |
| <p>π No documents yet</p> | |
| </div> | |
| """ | |
| html = """ | |
| <style> | |
| .docs-table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Arial, sans-serif; | |
| user-select: text; | |
| -webkit-user-select: text; | |
| -moz-user-select: text; | |
| -ms-user-select: text; | |
| } | |
| .docs-table thead { | |
| background-color: #f8f9fa; | |
| } | |
| .docs-table th { | |
| padding: 12px; | |
| text-align: left; | |
| font-weight: 600; | |
| border-bottom: 2px solid #dee2e6; | |
| user-select: text; | |
| } | |
| .docs-table td { | |
| padding: 12px; | |
| border-bottom: 1px solid #dee2e6; | |
| user-select: text; | |
| cursor: text; | |
| } | |
| .docs-table tr:hover { | |
| background-color: #f8f9fa; | |
| } | |
| .doc-name { | |
| color: #0066cc; | |
| word-break: break-all; | |
| } | |
| </style> | |
| <table class="docs-table"> | |
| <thead> | |
| <tr> | |
| <th>Document Name</th> | |
| <th>Type</th> | |
| <th>Upload Date</th> | |
| <th>Chunks</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| """ | |
| for doc in docs_dict.values(): | |
| html += f""" | |
| <tr> | |
| <td class="doc-name">{html_lib.escape(doc['name'])}</td> | |
| <td>{html_lib.escape(doc['type'])}</td> | |
| <td>{html_lib.escape(doc['date'])}</td> | |
| <td>{doc['chunks']}</td> | |
| </tr> | |
| """ | |
| html += """ | |
| </tbody> | |
| </table> | |
| """ | |
| return html | |
| except Exception as e: | |
| return f""" | |
| <div style="padding: 20px; color: #dc3545;"> | |
| <p>β Error: {str(e)}</p> | |
| </div> | |
| """ | |
| def upload_document(file, doc_type="document"): | |
| """ | |
| Upload PDF or DOCX file | |
| Args: | |
| file: Uploaded file object | |
| doc_type: Type of document | |
| Returns: | |
| Success message | |
| """ | |
| if file is None: | |
| return "β Please select a file" | |
| try: | |
| file_path = file.name | |
| # Ingest document | |
| num_chunks = ingest_document(file_path, doc_type) | |
| return f"β Success!\n\nFile: {os.path.basename(file_path)}\nChunks created: {num_chunks}\nType: {doc_type}" | |
| except Exception as e: | |
| return f"β Upload failed:\n{str(e)}" | |
| def scrape_single_url(url): | |
| """ | |
| Scrape single URL | |
| Args: | |
| url: URL to scrape | |
| Returns: | |
| Success message | |
| """ | |
| if not url: | |
| return "β Please enter a URL" | |
| try: | |
| num_chunks = process_and_store_webpage(url) | |
| return f"β Success!\n\nURL: {url}\nChunks created: {num_chunks}" | |
| except Exception as e: | |
| return f"β Scraping failed:\n{str(e)}" | |
| def scrape_multiple_urls(urls_text): | |
| """ | |
| Scrape multiple URLs | |
| Args: | |
| urls_text: URLs separated by newlines | |
| Returns: | |
| Summary of results | |
| """ | |
| if not urls_text: | |
| return "β Please enter URLs (one per line)" | |
| urls = [url.strip() for url in urls_text.split('\n') if url.strip()] | |
| results = [] | |
| success_count = 0 | |
| fail_count = 0 | |
| for url in urls: | |
| try: | |
| num_chunks = process_and_store_webpage(url) | |
| results.append(f"β {url}: {num_chunks} chunks") | |
| success_count += 1 | |
| except Exception as e: | |
| results.append(f"β {url}: {str(e)}") | |
| fail_count += 1 | |
| summary = f"π Summary: {success_count} succeeded, {fail_count} failed\n\n" | |
| return summary + "\n".join(results) | |
| def delete_document(source_name): | |
| """ | |
| Delete document by source name | |
| Args: | |
| source_name: Name or URL of the source | |
| Returns: | |
| Success message | |
| """ | |
| if not source_name: | |
| return "β Please enter document name or URL" | |
| try: | |
| client.delete( | |
| collection_name=collection_name, | |
| points_selector=models.FilterSelector( | |
| filter=models.Filter( | |
| must=[ | |
| models.FieldCondition( | |
| key="metadata.source", | |
| match=models.MatchValue(value=source_name) | |
| ) | |
| ] | |
| ) | |
| ) | |
| ) | |
| return f"β Successfully deleted all content from:\n{source_name}" | |
| except Exception as e: | |
| return f"β Deletion failed:\n{str(e)}" | |
| def get_current_settings(): | |
| """Load current settings and return as individual values for the UI.""" | |
| s = load_settings() | |
| return ( | |
| s["disclaimer"], | |
| s["welcome_message"], | |
| s["bot_avatar_url"], | |
| s["primary_color"], | |
| s["secondary_color"], | |
| s["font_family"], | |
| ) | |
| def save_chatbot_settings(disclaimer, welcome_message, bot_avatar_url, primary_color, secondary_color, font_family): | |
| """Save chatbot settings to config file.""" | |
| try: | |
| s = load_settings() | |
| s["disclaimer"] = disclaimer | |
| s["welcome_message"] = welcome_message | |
| s["bot_avatar_url"] = bot_avatar_url | |
| s["primary_color"] = primary_color | |
| s["secondary_color"] = secondary_color | |
| s["font_family"] = font_family | |
| save_settings(s) | |
| return "β Settings saved! Restart the chatbot space for changes to take effect." | |
| except Exception as e: | |
| return f"β Failed to save: {str(e)}" | |
| # ==================== Gradio Interface (5.49) ==================== | |
| with gr.Blocks( | |
| title="HR Intervals - Admin Panel", | |
| theme=gr.themes.Soft() | |
| ) as demo: | |
| gr.Markdown("# π HR Intervals - Knowledge Base Management") | |
| gr.Markdown("Manage documents and web content for the AI assistant") | |
| with gr.Tabs(): | |
| # Tab 1: View Documents | |
| with gr.Tab("π View Documents"): | |
| gr.Markdown("### Current documents in knowledge base") | |
| gr.Markdown("π‘ *Tip: You can select and copy any text from the table below*") | |
| refresh_btn = gr.Button("π Refresh List", variant="primary") | |
| docs_table = gr.HTML( | |
| label="Documents" | |
| ) | |
| refresh_btn.click(list_all_documents, outputs=docs_table) | |
| demo.load(list_all_documents, outputs=docs_table) | |
| # Tab 2: Upload Documents | |
| with gr.Tab("β¬οΈ Upload Documents"): | |
| gr.Markdown("### Upload PDF or DOCX files") | |
| file_input = gr.File( | |
| label="Select File (PDF or DOCX)", | |
| file_types=[".pdf", ".docx"] | |
| ) | |
| doc_type_input = gr.Radio( | |
| choices=["document", "policy", "guide", "article"], | |
| value="document", | |
| label="Document Type" | |
| ) | |
| upload_btn = gr.Button("π€ Upload", variant="primary", size="lg") | |
| upload_output = gr.Textbox(label="Upload Result", lines=5) | |
| upload_btn.click( | |
| upload_document, | |
| inputs=[file_input, doc_type_input], | |
| outputs=upload_output | |
| ) | |
| # Tab 3: Scrape URLs | |
| with gr.Tab("π Scrape Web Pages"): | |
| gr.Markdown("### Scrape content from URLs") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("#### Single URL") | |
| url_input = gr.Textbox( | |
| label="Enter URL", | |
| placeholder="https://example.com/article" | |
| ) | |
| scrape_btn = gr.Button("π Scrape", variant="primary") | |
| scrape_output = gr.Textbox(label="Result", lines=4) | |
| scrape_btn.click( | |
| scrape_single_url, | |
| inputs=url_input, | |
| outputs=scrape_output | |
| ) | |
| with gr.Column(): | |
| gr.Markdown("#### Batch URLs") | |
| urls_input = gr.Textbox( | |
| label="Enter multiple URLs (one per line)", | |
| placeholder="https://example.com/page1\nhttps://example.com/page2", | |
| lines=6 | |
| ) | |
| batch_btn = gr.Button("π Batch Scrape", variant="primary") | |
| batch_output = gr.Textbox(label="Batch Results", lines=8) | |
| batch_btn.click( | |
| scrape_multiple_urls, | |
| inputs=urls_input, | |
| outputs=batch_output | |
| ) | |
| # Tab 4: Delete Documents | |
| with gr.Tab("ποΈ Delete Documents"): | |
| gr.Markdown("### Delete documents or web pages") | |
| gr.Markdown("β οΈ **Warning**: This operation cannot be undone!") | |
| delete_input = gr.Textbox( | |
| label="Document Name or URL", | |
| placeholder="e.g., hiring_policy.pdf or https://example.com/article" | |
| ) | |
| delete_btn = gr.Button("ποΈ Delete", variant="stop", size="lg") | |
| delete_output = gr.Textbox(label="Delete Result", lines=3) | |
| delete_btn.click( | |
| delete_document, | |
| inputs=delete_input, | |
| outputs=delete_output | |
| ) | |
| # Tab 5: Chatbot Settings | |
| with gr.Tab("βοΈ Chatbot Settings"): | |
| gr.Markdown("### Chatbot Appearance & Text Settings") | |
| gr.Markdown("Changes take effect after the chatbot space restarts.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| setting_primary_color = gr.ColorPicker(label="Primary Color (buttons, links)") | |
| setting_secondary_color = gr.ColorPicker(label="Secondary Color (background)") | |
| setting_font = gr.Textbox(label="Font Family", placeholder="Arial, sans-serif") | |
| setting_avatar = gr.Textbox(label="Bot Avatar Image URL", placeholder="https://...") | |
| with gr.Column(): | |
| setting_disclaimer = gr.Textbox(label="Disclaimer Text (Markdown)", lines=6) | |
| setting_welcome = gr.Textbox(label="Welcome Message (Markdown)", lines=8) | |
| save_settings_btn = gr.Button("πΎ Save Settings", variant="primary", size="lg") | |
| settings_output = gr.Textbox(label="Result", lines=2) | |
| save_settings_btn.click( | |
| save_chatbot_settings, | |
| inputs=[setting_disclaimer, setting_welcome, setting_avatar, | |
| setting_primary_color, setting_secondary_color, setting_font], | |
| outputs=settings_output | |
| ) | |
| demo.load( | |
| get_current_settings, | |
| outputs=[setting_disclaimer, setting_welcome, setting_avatar, | |
| setting_primary_color, setting_secondary_color, setting_font] | |
| ) | |
| # Tab 6: Help | |
| with gr.Tab("βΉοΈ Help"): | |
| gr.Markdown(""" | |
| ### Usage Guide | |
| #### π View Documents | |
| - Shows all uploaded documents and web pages | |
| - Displays document type, upload date, and number of chunks | |
| - Click "Refresh" to see the latest status | |
| #### β¬οΈ Upload Documents | |
| - Supports PDF and DOCX formats | |
| - Documents are automatically split into chunks (~1000 characters each) | |
| - You can categorize documents by type | |
| #### π Scrape Web Pages | |
| - Enter full URLs (including https://) | |
| - Supports single or batch scraping | |
| - Content is automatically converted to Markdown format | |
| #### ποΈ Delete Documents | |
| - Enter exact filename or URL | |
| - Deletes all chunks from that source | |
| - **Warning**: Cannot be undone! | |
| - **Tip**: To update a document, delete it first then upload the new version | |
| --- | |
| ### Advanced Management | |
| For detailed vector database management, visit: | |
| [Qdrant Cloud Dashboard](https://cloud.qdrant.io) | |
| ### Technical Support | |
| If you encounter issues, please contact the development team. | |
| """) | |
| if __name__ == "__main__": | |
| admin_user = os.getenv("ADMIN_USERNAME", "admin") | |
| admin_pass = os.getenv("ADMIN_PASSWORD", "hr-intervals-2026") | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7861, | |
| share=False, | |
| auth=(admin_user, admin_pass), | |
| ) |