dev_LMS / src /streamlit_app.py
raymondEDS
Using pdf workaround
aa1280b
import streamlit as st
import PyPDF2
import io
import base64
from datetime import datetime
import json
import tempfile
import os
# Page configuration
st.set_page_config(
page_title="Dev LMS",
page_icon="πŸ“š",
layout="wide",
initial_sidebar_state="expanded"
)
# Initialize session state
if 'uploaded_documents' not in st.session_state:
st.session_state.uploaded_documents = {}
if 'current_user' not in st.session_state:
st.session_state.current_user = "User"
def save_document_info(filename, file_content, file_type, temp_path=None):
"""Save document information to session state"""
if 'documents' not in st.session_state.uploaded_documents:
st.session_state.uploaded_documents['documents'] = []
document_info = {
'filename': filename,
'upload_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'file_type': file_type,
'size': len(file_content),
'content': file_content.decode('latin-1') if isinstance(file_content, bytes) else str(file_content),
'temp_path': temp_path # Store temp path for later use
}
st.session_state.uploaded_documents['documents'].append(document_info)
def extract_pdf_text_from_temp(temp_path):
"""Extract text from PDF file using temporary file path"""
try:
with open(temp_path, "rb") as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text() + "\n"
return text
except Exception as e:
st.error(f"Error reading PDF: {str(e)}")
return ""
def extract_pdf_text_from_memory(uploaded_file):
"""Extract text from PDF file in memory"""
try:
pdf_reader = PyPDF2.PdfReader(uploaded_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text() + "\n"
return text
except Exception as e:
st.error(f"Error reading PDF: {str(e)}")
return ""
def cleanup_temp_file(temp_path):
"""Clean up temporary file"""
try:
if temp_path and os.path.exists(temp_path):
os.remove(temp_path)
except Exception as e:
st.warning(f"Could not clean up temporary file: {str(e)}")
def main():
# Sidebar for navigation
with st.sidebar:
st.title("πŸ“š Dev LMS")
st.markdown("---")
# Navigation
page = st.selectbox(
"Navigation",
["Dashboard", "Upload Documents", "My Documents", "Document Library", "Settings"]
)
# Main content area
if page == "Dashboard":
show_dashboard()
elif page == "Upload Documents":
show_upload_documents()
elif page == "My Documents":
show_my_documents()
elif page == "Document Library":
show_document_library()
elif page == "Settings":
show_settings()
def show_dashboard():
"""Show the main dashboard"""
st.title("πŸ“Š Dashboard")
st.markdown("---")
col1, col2, col3 = st.columns(3)
with col1:
st.metric(
label="Total Documents",
value=len(st.session_state.uploaded_documents.get('documents', [])),
delta="0"
)
with col2:
st.metric(
label="System Status",
value="Active",
delta="0"
)
with col3:
st.metric(
label="Storage Used",
value="Session",
delta="0"
)
st.markdown("---")
# Recent activity
st.subheader("πŸ“ˆ Recent Activity")
documents = st.session_state.uploaded_documents.get('documents', [])
if documents:
recent_docs = documents[-5:]
for doc in recent_docs:
with st.container():
col1, col2, col3 = st.columns([3, 2, 1])
with col1:
st.write(f"**{doc['filename']}**")
with col2:
st.write(doc['upload_time'])
with col3:
st.write(f"{doc['file_type']}")
st.markdown("---")
else:
st.info("No documents uploaded yet. Start by uploading a PDF document!")
def show_upload_documents():
"""Show document upload interface"""
st.title("πŸ“€ Upload Documents")
st.markdown("---")
# Add information about file upload
st.info("πŸ’‘ **Note:** File upload uses temporary storage for better compatibility with Hugging Face Spaces.")
uploaded_file = st.file_uploader(
"Choose a PDF file",
type=['pdf'],
help="Upload PDF documents to the LMS (max 200MB)",
accept_multiple_files=False
)
if uploaded_file is not None:
try:
# Display file info
file_details = {
"Filename": uploaded_file.name,
"File size": f"{uploaded_file.size / 1024:.2f} KB",
"File type": uploaded_file.type
}
st.write("**File Details:**")
for key, value in file_details.items():
st.write(f"- {key}: {value}")
# Create temporary file for better PDF processing
temp_path = None
try:
with tempfile.NamedTemporaryFile(mode="wb", suffix=".pdf", delete=False) as temp:
bytes_data = uploaded_file.getvalue()
temp.write(bytes_data)
temp_path = temp.name
st.success(f"πŸ“ File temporarily stored at: {temp_path}")
# Extract and display PDF content using temporary file
pdf_text = extract_pdf_text_from_temp(temp_path)
if pdf_text.strip():
st.subheader("πŸ“„ Document Preview")
with st.expander("View extracted text"):
st.text_area("PDF Content", pdf_text, height=300)
else:
st.warning("⚠️ Could not extract text from this PDF. The file may be image-based or encrypted.")
# Upload button
if st.button("Upload Document", type="primary"):
try:
# Save document info with temporary file path
save_document_info(
uploaded_file.name,
bytes_data,
"PDF",
temp_path
)
st.success(f"βœ… Document '{uploaded_file.name}' uploaded successfully!")
st.balloons()
# Clear the file uploader
st.rerun()
except Exception as e:
st.error(f"❌ Error uploading document: {str(e)}")
st.info("πŸ’‘ Try uploading a smaller file or refresh the page.")
# Clean up temp file on error
cleanup_temp_file(temp_path)
except Exception as e:
st.error(f"❌ Error creating temporary file: {str(e)}")
st.info("πŸ’‘ Please try uploading a different PDF file.")
cleanup_temp_file(temp_path)
except Exception as e:
st.error(f"❌ Error processing file: {str(e)}")
st.info("πŸ’‘ Please try uploading a different PDF file.")
# Add helpful tips
with st.expander("πŸ’‘ Upload Tips"):
st.markdown("""
**For best results:**
- Use PDF files under 200MB
- Ensure PDFs contain text (not just images)
- Avoid password-protected PDFs
- If upload fails, try refreshing the page
**Technical details:**
- Files are temporarily stored on the server
- Text extraction uses temporary file processing
- Automatic cleanup of temporary files
**Supported formats:** PDF only
""")
def show_my_documents():
"""Show uploaded documents"""
st.title("πŸ“ My Documents")
st.markdown("---")
documents = st.session_state.uploaded_documents.get('documents', [])
if not documents:
st.info("You haven't uploaded any documents yet.")
return
# Search functionality
search_term = st.text_input("πŸ” Search documents", placeholder="Enter filename or content...")
# Filter documents based on search
filtered_docs = documents
if search_term:
filtered_docs = [
doc for doc in documents
if search_term.lower() in doc['filename'].lower() or
search_term.lower() in doc.get('content', '').lower()
]
if not filtered_docs:
st.warning("No documents match your search criteria.")
return
# Display documents
for i, doc in enumerate(filtered_docs):
with st.container():
col1, col2, col3, col4 = st.columns([3, 2, 1, 1])
with col1:
st.write(f"**{doc['filename']}**")
with col2:
st.write(doc['upload_time'])
with col3:
st.write(f"{doc['file_type']}")
with col4:
if st.button(f"View {i}", key=f"view_{i}"):
st.subheader(f"πŸ“„ {doc['filename']}")
st.write(f"**Uploaded:** {doc['upload_time']}")
st.write(f"**Size:** {doc['size']} bytes")
# Check if we have a temporary file path for better content extraction
if doc.get('temp_path') and os.path.exists(doc['temp_path']):
try:
# Extract fresh content from temporary file
fresh_content = extract_pdf_text_from_temp(doc['temp_path'])
if fresh_content.strip():
st.text_area("Document Content (Fresh Extract)", fresh_content, height=400, key=f"fresh_content_{i}")
else:
# Fall back to stored content
if 'content' in doc and doc['content']:
st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"content_{i}")
except Exception as e:
st.warning(f"Could not read from temporary file: {str(e)}")
# Fall back to stored content
if 'content' in doc and doc['content']:
st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"content_{i}")
else:
# Display stored content
if 'content' in doc and doc['content']:
st.text_area("Document Content", doc['content'], height=400, key=f"content_{i}")
st.markdown("---")
def show_document_library():
"""Show all documents in the system"""
st.title("πŸ“š Document Library")
st.markdown("---")
documents = st.session_state.uploaded_documents.get('documents', [])
if not documents:
st.info("No documents have been uploaded to the system yet.")
return
# Search functionality
search_term = st.text_input("πŸ” Search all documents", placeholder="Enter filename or content...")
# Filter documents based on search
filtered_docs = documents
if search_term:
filtered_docs = [
doc for doc in documents
if search_term.lower() in doc['filename'].lower() or
search_term.lower() in doc.get('content', '').lower()
]
if not filtered_docs:
st.warning("No documents match your search criteria.")
return
# Display documents
for i, doc in enumerate(filtered_docs):
with st.container():
col1, col2, col3, col4 = st.columns([3, 2, 1, 1])
with col1:
st.write(f"**{doc['filename']}**")
with col2:
st.write(doc['upload_time'])
with col3:
st.write(f"{doc['file_type']}")
with col4:
if st.button(f"View {i}", key=f"lib_view_{i}"):
st.subheader(f"πŸ“„ {doc['filename']}")
st.write(f"**Uploaded:** {doc['upload_time']}")
st.write(f"**Size:** {doc['size']} bytes")
# Check if we have a temporary file path for better content extraction
if doc.get('temp_path') and os.path.exists(doc['temp_path']):
try:
# Extract fresh content from temporary file
fresh_content = extract_pdf_text_from_temp(doc['temp_path'])
if fresh_content.strip():
st.text_area("Document Content (Fresh Extract)", fresh_content, height=400, key=f"lib_fresh_content_{i}")
else:
# Fall back to stored content
if 'content' in doc and doc['content']:
st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"lib_content_{i}")
except Exception as e:
st.warning(f"Could not read from temporary file: {str(e)}")
# Fall back to stored content
if 'content' in doc and doc['content']:
st.text_area("Document Content (Stored)", doc['content'], height=400, key=f"lib_content_{i}")
else:
# Display stored content
if 'content' in doc and doc['content']:
st.text_area("Document Content", doc['content'], height=400, key=f"lib_content_{i}")
st.markdown("---")
def show_settings():
"""Show user settings"""
st.title("βš™οΈ Settings")
st.markdown("---")
st.subheader("πŸ”§ System Information")
st.write("**Version:** Dev LMS v1.0")
st.write("**Features:**")
st.write("- PDF document upload with temporary storage")
st.write("- Document search and preview")
st.write("- Document library")
st.write("- Session-based storage")
st.markdown("---")
# Export data option
if st.button("πŸ“₯ Export All Data"):
documents = st.session_state.uploaded_documents.get('documents', [])
if documents:
# Create JSON export
export_data = {
'export_date': datetime.now().isoformat(),
'documents': documents
}
st.download_button(
label="Download JSON Export",
data=json.dumps(export_data, indent=2),
file_name=f"lms_data_export.json",
mime="application/json"
)
else:
st.info("No data to export.")
st.markdown("---")
# Clear data option
if st.button("πŸ—‘οΈ Clear All Data"):
if st.session_state.uploaded_documents.get('documents'):
# Clean up temporary files before clearing data
documents = st.session_state.uploaded_documents['documents']
for doc in documents:
if doc.get('temp_path'):
cleanup_temp_file(doc['temp_path'])
st.session_state.uploaded_documents['documents'] = []
st.success("All documents and temporary files have been cleared!")
st.rerun()
else:
st.info("No documents to clear.")
st.markdown("---")
# Cleanup temporary files option
if st.button("🧹 Cleanup Temporary Files"):
documents = st.session_state.uploaded_documents.get('documents', [])
cleaned_count = 0
for doc in documents:
if doc.get('temp_path') and not os.path.exists(doc['temp_path']):
# Remove temp_path reference if file doesn't exist
doc.pop('temp_path', None)
cleaned_count += 1
if cleaned_count > 0:
st.success(f"Cleaned up {cleaned_count} missing temporary file references!")
else:
st.info("No cleanup needed - all temporary files are properly managed.")
st.markdown("---")
# System status
st.subheader("πŸ“Š System Status")
documents = st.session_state.uploaded_documents.get('documents', [])
temp_files_count = sum(1 for doc in documents if doc.get('temp_path') and os.path.exists(doc['temp_path']))
col1, col2 = st.columns(2)
with col1:
st.metric("Total Documents", len(documents))
with col2:
st.metric("Active Temp Files", temp_files_count)
if __name__ == "__main__":
main()