Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import PyPDF2 | |
| import io | |
| import base64 | |
| from datetime import datetime | |
| import json | |
| import tempfile | |
| import os | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="Dev LMS", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Initialize session state | |
| if 'uploaded_documents' not in st.session_state: | |
| st.session_state.uploaded_documents = {} | |
| if 'current_user' not in st.session_state: | |
| st.session_state.current_user = "User" | |
| def save_document_info(filename, file_content, file_type, temp_path=None): | |
| """Save document information to session state""" | |
| if 'documents' not in st.session_state.uploaded_documents: | |
| st.session_state.uploaded_documents['documents'] = [] | |
| document_info = { | |
| 'filename': filename, | |
| 'upload_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
| 'file_type': file_type, | |
| 'size': len(file_content), | |
| 'content': file_content.decode('latin-1') if isinstance(file_content, bytes) else str(file_content), | |
| 'temp_path': temp_path # Store temp path for later use | |
| } | |
| st.session_state.uploaded_documents['documents'].append(document_info) | |
| def extract_pdf_text_from_temp(temp_path): | |
| """Extract text from PDF file using temporary file path""" | |
| try: | |
| with open(temp_path, "rb") as pdf_file: | |
| pdf_reader = PyPDF2.PdfReader(pdf_file) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text | |
| except Exception as e: | |
| st.error(f"Error reading PDF: {str(e)}") | |
| return "" | |
| def extract_pdf_text_from_memory(uploaded_file): | |
| """Extract text from PDF file in memory""" | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(uploaded_file) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text | |
| except Exception as e: | |
| st.error(f"Error reading PDF: {str(e)}") | |
| return "" | |
| def cleanup_temp_file(temp_path): | |
| """Clean up temporary file""" | |
| try: | |
| if temp_path and os.path.exists(temp_path): | |
| os.remove(temp_path) | |
| except Exception as e: | |
| st.warning(f"Could not clean up temporary file: {str(e)}") | |
| def main(): | |
| # Sidebar for navigation | |
| with st.sidebar: | |
| st.title("π Dev LMS") | |
| st.markdown("---") | |
| # Navigation | |
| page = st.selectbox( | |
| "Navigation", | |
| ["Dashboard", "Upload Documents", "My Documents", "Document Library", "Settings"] | |
| ) | |
| # Main content area | |
| if page == "Dashboard": | |
| show_dashboard() | |
| elif page == "Upload Documents": | |
| show_upload_documents() | |
| elif page == "My Documents": | |
| show_my_documents() | |
| elif page == "Document Library": | |
| show_document_library() | |
| elif page == "Settings": | |
| show_settings() | |
| def show_dashboard(): | |
| """Show the main dashboard""" | |
| st.title("π Dashboard") | |
| st.markdown("---") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric( | |
| label="Total Documents", | |
| value=len(st.session_state.uploaded_documents.get('documents', [])), | |
| delta="0" | |
| ) | |
| with col2: | |
| st.metric( | |
| label="System Status", | |
| value="Active", | |
| delta="0" | |
| ) | |
| with col3: | |
| st.metric( | |
| label="Storage Used", | |
| value="Session", | |
| delta="0" | |
| ) | |
| st.markdown("---") | |
| # Recent activity | |
| st.subheader("π Recent Activity") | |
| documents = st.session_state.uploaded_documents.get('documents', []) | |
| if documents: | |
| recent_docs = documents[-5:] | |
| for doc in recent_docs: | |
| with st.container(): | |
| col1, col2, col3 = st.columns([3, 2, 1]) | |
| with col1: | |
| st.write(f"**{doc['filename']}**") | |
| with col2: | |
| st.write(doc['upload_time']) | |
| with col3: | |
| st.write(f"{doc['file_type']}") | |
| st.markdown("---") | |
| else: | |
| st.info("No documents uploaded yet. Start by uploading a PDF document!") | |
| def show_upload_documents(): | |
| """Show document upload interface""" | |
| st.title("π€ Upload Documents") | |
| st.markdown("---") | |
| # Add information about file upload | |
| st.info("π‘ **Note:** File upload uses temporary storage for better compatibility with Hugging Face Spaces.") | |
| uploaded_file = st.file_uploader( | |
| "Choose a PDF file", | |
| type=['pdf'], | |
| help="Upload PDF documents to the LMS (max 200MB)", | |
| accept_multiple_files=False | |
| ) | |
| if uploaded_file is not None: | |
| try: | |
| # Display file info | |
| file_details = { | |
| "Filename": uploaded_file.name, | |
| "File size": f"{uploaded_file.size / 1024:.2f} KB", | |
| "File type": uploaded_file.type | |
| } | |
| st.write("**File Details:**") | |
| for key, value in file_details.items(): | |
| st.write(f"- {key}: {value}") | |
| # Create temporary file for better PDF processing | |
| temp_path = None | |
| try: | |
| with tempfile.NamedTemporaryFile(mode="wb", suffix=".pdf", delete=False) as temp: | |
| bytes_data = uploaded_file.getvalue() | |
| temp.write(bytes_data) | |
| temp_path = temp.name | |
| st.success(f"π File temporarily stored at: {temp_path}") | |
| # Extract and display PDF content using temporary file | |
| pdf_text = extract_pdf_text_from_temp(temp_path) | |
| if pdf_text.strip(): | |
| st.subheader("π Document Preview") | |
| with st.expander("View extracted text"): | |
| st.text_area("PDF Content", pdf_text, height=300) | |
| else: | |
| st.warning("β οΈ Could not extract text from this PDF. The file may be image-based or encrypted.") | |
| # Upload button | |
| if st.button("Upload Document", type="primary"): | |
| try: | |
| # Save document info with temporary file path | |
| save_document_info( | |
| uploaded_file.name, | |
| bytes_data, | |
| "PDF", | |
| temp_path | |
| ) | |
| st.success(f"β Document '{uploaded_file.name}' uploaded successfully!") | |
| st.balloons() | |
| # Clear the file uploader | |
| st.rerun() | |
| except Exception as e: | |
| st.error(f"β Error uploading document: {str(e)}") | |
| st.info("π‘ Try uploading a smaller file or refresh the page.") | |
| # Clean up temp file on error | |
| cleanup_temp_file(temp_path) | |
| except Exception as e: | |
| st.error(f"β Error creating temporary file: {str(e)}") | |
| st.info("π‘ Please try uploading a different PDF file.") | |
| cleanup_temp_file(temp_path) | |
| except Exception as e: | |
| st.error(f"β Error processing file: {str(e)}") | |
| st.info("π‘ Please try uploading a different PDF file.") | |
| # Add helpful tips | |
| with st.expander("π‘ Upload Tips"): | |
| st.markdown(""" | |
| **For best results:** | |
| - Use PDF files under 200MB | |
| - Ensure PDFs contain text (not just images) | |
| - Avoid password-protected PDFs | |
| - If upload fails, try refreshing the page | |
| **Technical details:** | |
| - Files are temporarily stored on the server | |
| - Text extraction uses temporary file processing | |
| - Automatic cleanup of temporary files | |
| **Supported formats:** PDF only | |
| """) | |
| def show_my_documents(): | |
| """Show uploaded documents""" | |
| st.title("π My Documents") | |
| st.markdown("---") | |
| documents = st.session_state.uploaded_documents.get('documents', []) | |
| if not documents: | |
| st.info("You haven't uploaded any documents yet.") | |
| return | |
| # Search functionality | |
| search_term = st.text_input("π Search documents", placeholder="Enter filename or content...") | |
| # Filter documents based on search | |
| filtered_docs = documents | |
| if search_term: | |
| filtered_docs = [ | |
| doc for doc in documents | |
| if search_term.lower() in doc['filename'].lower() or | |
| search_term.lower() in doc.get('content', '').lower() | |
| ] | |
| if not filtered_docs: | |
| st.warning("No documents match your search criteria.") | |
| return | |
| # Display documents | |
| for i, doc in enumerate(filtered_docs): | |
| with st.container(): | |
| col1, col2, col3, col4 = st.columns([3, 2, 1, 1]) | |
| with col1: | |
| st.write(f"**{doc['filename']}**") | |
| with col2: | |
| st.write(doc['upload_time']) | |
| with col3: | |
| st.write(f"{doc['file_type']}") | |
| with col4: | |
| if st.button(f"View {i}", key=f"view_{i}"): | |
| st.subheader(f"π {doc['filename']}") | |
| st.write(f"**Uploaded:** {doc['upload_time']}") | |
| st.write(f"**Size:** {doc['size']} bytes") | |
| # Check if we have a temporary file path for better content extraction | |
| if doc.get('temp_path') and os.path.exists(doc['temp_path']): | |
| try: | |
| # Extract fresh content from temporary file | |
| fresh_content = extract_pdf_text_from_temp(doc['temp_path']) | |
| if fresh_content.strip(): | |
| st.text_area("Document Content (Fresh Extract)", fresh_content, height=400, key=f"fresh_content_{i}") | |
| else: | |
| # Fall back to stored content | |
| if 'content' in doc and doc['content']: | |
| st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"content_{i}") | |
| except Exception as e: | |
| st.warning(f"Could not read from temporary file: {str(e)}") | |
| # Fall back to stored content | |
| if 'content' in doc and doc['content']: | |
| st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"content_{i}") | |
| else: | |
| # Display stored content | |
| if 'content' in doc and doc['content']: | |
| st.text_area("Document Content", doc['content'], height=400, key=f"content_{i}") | |
| st.markdown("---") | |
| def show_document_library(): | |
| """Show all documents in the system""" | |
| st.title("π Document Library") | |
| st.markdown("---") | |
| documents = st.session_state.uploaded_documents.get('documents', []) | |
| if not documents: | |
| st.info("No documents have been uploaded to the system yet.") | |
| return | |
| # Search functionality | |
| search_term = st.text_input("π Search all documents", placeholder="Enter filename or content...") | |
| # Filter documents based on search | |
| filtered_docs = documents | |
| if search_term: | |
| filtered_docs = [ | |
| doc for doc in documents | |
| if search_term.lower() in doc['filename'].lower() or | |
| search_term.lower() in doc.get('content', '').lower() | |
| ] | |
| if not filtered_docs: | |
| st.warning("No documents match your search criteria.") | |
| return | |
| # Display documents | |
| for i, doc in enumerate(filtered_docs): | |
| with st.container(): | |
| col1, col2, col3, col4 = st.columns([3, 2, 1, 1]) | |
| with col1: | |
| st.write(f"**{doc['filename']}**") | |
| with col2: | |
| st.write(doc['upload_time']) | |
| with col3: | |
| st.write(f"{doc['file_type']}") | |
| with col4: | |
| if st.button(f"View {i}", key=f"lib_view_{i}"): | |
| st.subheader(f"π {doc['filename']}") | |
| st.write(f"**Uploaded:** {doc['upload_time']}") | |
| st.write(f"**Size:** {doc['size']} bytes") | |
| # Check if we have a temporary file path for better content extraction | |
| if doc.get('temp_path') and os.path.exists(doc['temp_path']): | |
| try: | |
| # Extract fresh content from temporary file | |
| fresh_content = extract_pdf_text_from_temp(doc['temp_path']) | |
| if fresh_content.strip(): | |
| st.text_area("Document Content (Fresh Extract)", fresh_content, height=400, key=f"lib_fresh_content_{i}") | |
| else: | |
| # Fall back to stored content | |
| if 'content' in doc and doc['content']: | |
| st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"lib_content_{i}") | |
| except Exception as e: | |
| st.warning(f"Could not read from temporary file: {str(e)}") | |
| # Fall back to stored content | |
| if 'content' in doc and doc['content']: | |
| st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"lib_content_{i}") | |
| else: | |
| # Display stored content | |
| if 'content' in doc and doc['content']: | |
| st.text_area("Document Content", doc['content'], height=400, key=f"lib_content_{i}") | |
| st.markdown("---") | |
| def show_settings(): | |
| """Show user settings""" | |
| st.title("βοΈ Settings") | |
| st.markdown("---") | |
| st.subheader("π§ System Information") | |
| st.write("**Version:** Dev LMS v1.0") | |
| st.write("**Features:**") | |
| st.write("- PDF document upload with temporary storage") | |
| st.write("- Document search and preview") | |
| st.write("- Document library") | |
| st.write("- Session-based storage") | |
| st.markdown("---") | |
| # Export data option | |
| if st.button("π₯ Export All Data"): | |
| documents = st.session_state.uploaded_documents.get('documents', []) | |
| if documents: | |
| # Create JSON export | |
| export_data = { | |
| 'export_date': datetime.now().isoformat(), | |
| 'documents': documents | |
| } | |
| st.download_button( | |
| label="Download JSON Export", | |
| data=json.dumps(export_data, indent=2), | |
| file_name=f"lms_data_export.json", | |
| mime="application/json" | |
| ) | |
| else: | |
| st.info("No data to export.") | |
| st.markdown("---") | |
| # Clear data option | |
| if st.button("ποΈ Clear All Data"): | |
| if st.session_state.uploaded_documents.get('documents'): | |
| # Clean up temporary files before clearing data | |
| documents = st.session_state.uploaded_documents['documents'] | |
| for doc in documents: | |
| if doc.get('temp_path'): | |
| cleanup_temp_file(doc['temp_path']) | |
| st.session_state.uploaded_documents['documents'] = [] | |
| st.success("All documents and temporary files have been cleared!") | |
| st.rerun() | |
| else: | |
| st.info("No documents to clear.") | |
| st.markdown("---") | |
| # Cleanup temporary files option | |
| if st.button("π§Ή Cleanup Temporary Files"): | |
| documents = st.session_state.uploaded_documents.get('documents', []) | |
| cleaned_count = 0 | |
| for doc in documents: | |
| if doc.get('temp_path') and not os.path.exists(doc['temp_path']): | |
| # Remove temp_path reference if file doesn't exist | |
| doc.pop('temp_path', None) | |
| cleaned_count += 1 | |
| if cleaned_count > 0: | |
| st.success(f"Cleaned up {cleaned_count} missing temporary file references!") | |
| else: | |
| st.info("No cleanup needed - all temporary files are properly managed.") | |
| st.markdown("---") | |
| # System status | |
| st.subheader("π System Status") | |
| documents = st.session_state.uploaded_documents.get('documents', []) | |
| temp_files_count = sum(1 for doc in documents if doc.get('temp_path') and os.path.exists(doc['temp_path'])) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric("Total Documents", len(documents)) | |
| with col2: | |
| st.metric("Active Temp Files", temp_files_count) | |
| if __name__ == "__main__": | |
| main() |