Spaces:

Prateek-044
/

NoteSnap

Sleeping

App Files Files Community

Prateek-044 commited on 26 days ago

Commit

e9cd410

verified ·

1 Parent(s): 20d5588

Upload 24 files

Browse files

Files changed (24) hide show

.dockerignore +94 -0
.env.example +34 -0
.gitignore +156 -0
CHANGELOG.md +145 -0
CONTRIBUTING.md +267 -0
DOCKER.md +306 -0
Dockerfile +42 -7
Dockerfile.prod +80 -0
LICENSE +225 -0
README.md +481 -20
app.py +196 -0
docker-build.sh +55 -0
docker-compose.dev.yml +42 -0
docker-compose.yml +46 -0
docker-dev.sh +35 -0
docker-run.sh +72 -0
docker-test.sh +117 -0
formatting.patch +65 -0
requirements.docker.txt +27 -0
requirements.txt +27 -3
run.py +42 -0
setup.py +44 -0
start.sh +42 -0
test_basic.py +143 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,94 @@

+# Git
+.git
+.gitignore
+.gitattributes
+# Docker
+Dockerfile*
+docker-compose*
+.dockerignore
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# Virtual environments
+venv/
+env/
+ENV/
+env.bak/
+venv.bak/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# Logs
+*.log
+logs/
+app.log
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+# Documentation
+docs/
+*.md
+README.md
+# Temporary files
+*.tmp
+*.temp
+temp/
+tmp/
+# Model cache (will be downloaded at runtime)
+.cache/
+models/
+# Uploads (runtime directory)
+uploads/
+# Environment files
+.env
+.env.local
+.env.production

.env.example ADDED Viewed

	@@ -0,0 +1,34 @@

+# AI Notes Summarizer Environment Configuration
+# Application Settings
+APP_NAME=AI Notes Summarizer
+APP_VERSION=1.0.0
+DEBUG=false
+# Streamlit Configuration
+STREAMLIT_SERVER_PORT=8501
+STREAMLIT_SERVER_ADDRESS=0.0.0.0
+STREAMLIT_SERVER_HEADLESS=true
+STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
+# AI Model Configuration
+DEFAULT_MODEL=facebook/bart-large-cnn
+MODEL_CACHE_DIR=/app/.cache/huggingface
+TRANSFORMERS_CACHE=/app/.cache/huggingface
+# Processing Limits
+MAX_FILE_SIZE_MB=10
+MAX_TEXT_LENGTH=50000
+CHUNK_SIZE=1024
+# Security
+ALLOWED_EXTENSIONS=pdf
+MAX_UPLOAD_SIZE=10485760
+# Logging
+LOG_LEVEL=INFO
+LOG_FILE=/app/logs/app.log
+# Performance
+TORCH_HOME=/app/.cache/torch
+HF_HOME=/app/.cache/huggingface

.gitignore ADDED Viewed

	@@ -0,0 +1,156 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+Pipfile.lock
+# PEP 582
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# Streamlit
+.streamlit/secrets.toml
+# AI Models Cache
+.cache/
+models/
+*.bin
+*.safetensors
+# Uploads
+uploads/
+temp/
+# Logs
+logs/
+*.log
+# OS
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# Docker
+.dockerignore
+docker-compose.override.yml

CHANGELOG.md ADDED Viewed

	@@ -0,0 +1,145 @@

+# Changelog
+All notable changes to this project will be documented in this file.
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [Unreleased]
+### Added
+- Initial project setup and documentation
+## [1.0.0] - 2025-01-17
+### Added
+- 📝 **Core Features**
+  - PDF file upload and text extraction using PyPDF2
+  - Direct text input for summarization
+  - AI-powered summarization using Hugging Face Transformers (BART, T5, DistilBART)
+  - Bullet-point formatted summary output
+  - Real-time progress indicators during processing
+- 🎨 **User Interface**
+  - Clean Streamlit web interface
+  - Tabbed layout for PDF upload and text input
+  - Model selection dropdown (BART, T5, DistilBART)
+  - Summary length customization (Short, Medium, Long)
+  - Statistics display (word count, compression ratio)
+  - Download functionality for generated summaries
+- 🐳 **Docker Support**
+  - Multi-stage Dockerfile for optimized builds
+  - Docker Compose configuration for easy deployment
+  - Development Docker setup with live reload
+  - Production-optimized Docker configuration
+  - Comprehensive Docker documentation
+- 🛠️ **Development Tools**
+  - Modular code architecture with separate modules
+  - Comprehensive error handling and user feedback
+  - Basic testing framework
+  - Docker build and run scripts
+  - Development environment setup
+- 📚 **Documentation**
+  - Detailed README with installation and usage instructions
+  - Docker deployment guide
+  - Troubleshooting section
+  - API documentation for modules
+- 🔒 **Security & Performance**
+  - Non-root Docker container execution
+  - Input validation and file size limits
+  - Model caching for improved performance
+  - Resource limits and health checks
+### Technical Details
+- **Backend**: Python 3.8+, Streamlit, Hugging Face Transformers, PyTorch
+- **AI Models**: BART (facebook/bart-large-cnn), T5, DistilBART
+- **PDF Processing**: PyPDF2 with comprehensive error handling
+- **Containerization**: Docker with multi-stage builds
+- **Architecture**: Modular design with separate PDF processing and summarization modules
+### Dependencies
+- streamlit>=1.28.0
+- transformers>=4.35.0
+- torch>=2.0.0
+- PyPDF2>=3.0.1
+- Additional utilities for text processing and acceleration
+---
+## Release Notes
+### Version 1.0.0 Highlights
+🎉 **Initial Release** - NoteSnap is now available!
+This first release provides a complete solution for document summarization with:
+- **Easy-to-use web interface** built with Streamlit
+- **Multiple AI models** for different use cases and performance needs
+- **Docker support** for consistent deployment across environments
+- **Comprehensive documentation** for users and developers
+### Supported Platforms
+- **Local Installation**: Windows, macOS, Linux with Python 3.8+
+- **Docker**: Any platform supporting Docker containers
+- **Cloud Deployment**: Compatible with cloud platforms supporting Docker
+### Known Limitations
+- PDF processing limited to text-based documents (no OCR for scanned images)
+- Maximum file size limit of 10MB for PDF uploads
+- Internet connection required for initial model downloads
+- GPU acceleration optional but recommended for better performance
+### Upcoming Features (Roadmap)
+- 📱 Mobile-responsive interface improvements
+- 🔍 OCR support for scanned PDF documents
+- 🌐 Multi-language summarization support
+- 📊 Advanced analytics and summary quality metrics
+- 🔗 API endpoints for programmatic access
+- 📱 Progressive Web App (PWA) capabilities
+---
+## Migration Guide
+### From Development to Production
+When deploying to production:
+1. **Use Docker Compose**:
+   ```bash
+   docker-compose up -d
+   ```
+2. **Configure Environment Variables**:
+   - Copy `.env.example` to `.env`
+   - Adjust settings for your environment
+3. **Set Resource Limits**:
+   - Ensure adequate memory (4GB+ recommended)
+   - Configure CPU limits based on expected load
+### Updating Dependencies
+To update to newer versions:
+```bash
+# Update Python packages
+pip install -r requirements.txt --upgrade
+# Rebuild Docker image
+docker-compose build --no-cache
+```
+---
+## Support
+For questions, issues, or contributions:
+- 🐛 [Report Issues](https://github.com/PRATEEK-260/NoteSnap/issues)
+- 💬 [Discussions](https://github.com/PRATEEK-260/NoteSnap/discussions)
+---
+**Thank you for using NoteSnap!** 🎉

CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,267 @@

+# 🤝 Contributing to NoteSnap
+Thank you for your interest in contributing to NoteSnap! This document provides guidelines and information for contributors.
+## 📋 Table of Contents
+- [Code of Conduct](#code-of-conduct)
+- [Getting Started](#getting-started)
+- [Development Setup](#development-setup)
+- [Making Changes](#making-changes)
+- [Submitting Changes](#submitting-changes)
+- [Style Guidelines](#style-guidelines)
+- [Testing](#testing)
+- [Documentation](#documentation)
+## 📜 Code of Conduct
+This project and everyone participating in it is governed by our commitment to creating a welcoming and inclusive environment. Please be respectful and constructive in all interactions.
+## 🚀 Getting Started
+### Prerequisites
+- Python 3.8 or higher
+- Git
+- Docker (optional but recommended)
+- Basic knowledge of Python, Streamlit, and AI/ML concepts
+### Fork and Clone
+1. Fork the repository on GitHub
+2. Clone your fork locally:
+   ```bash
+   git clone https://github.com/YOUR-USERNAME/NoteSnap.git
+   cd NoteSnap
+   ```
+## 🛠️ Development Setup
+### Local Development
+1. **Create a virtual environment:**
+   ```bash
+   python -m venv venv
+   source venv/bin/activate  # On Windows: venv\Scripts\activate
+   ```
+2. **Install dependencies:**
+   ```bash
+   pip install -r requirements.txt
+   ```
+3. **Run the application:**
+   ```bash
+   streamlit run app.py
+   ```
+### Docker Development
+1. **Build and run with Docker:**
+   ```bash
+   ./docker-dev.sh
+   ```
+2. **Or use Docker Compose:**
+   ```bash
+   docker-compose -f docker-compose.dev.yml up
+   ```
+## 🔄 Making Changes
+### Branch Naming
+Use descriptive branch names:
+- `feature/add-new-model-support`
+- `bugfix/fix-pdf-processing-error`
+- `docs/update-installation-guide`
+- `refactor/improve-error-handling`
+### Commit Messages
+Follow conventional commit format:
+```
+type(scope): description
+[optional body]
+[optional footer]
+```
+Examples:
+- `feat(summarizer): add support for T5 model`
+- `fix(pdf): resolve text extraction encoding issue`
+- `docs(readme): update installation instructions`
+## 📤 Submitting Changes
+### Pull Request Process
+1. **Create a feature branch:**
+   ```bash
+   git checkout -b feature/your-feature-name
+   ```
+2. **Make your changes and commit:**
+   ```bash
+   git add .
+   git commit -m "feat: add your feature description"
+   ```
+3. **Push to your fork:**
+   ```bash
+   git push origin feature/your-feature-name
+   ```
+4. **Create a Pull Request** on GitHub with:
+   - Clear title and description
+   - Reference to related issues
+   - Screenshots if applicable
+   - Test results
+### Pull Request Requirements
+- [ ] Code follows project style guidelines
+- [ ] All tests pass
+- [ ] Documentation updated if needed
+- [ ] No breaking changes (or clearly documented)
+- [ ] Self-review completed
+## 🎨 Style Guidelines
+### Python Code Style
+- Follow PEP 8
+- Use meaningful variable and function names
+- Add docstrings for functions and classes
+- Keep functions focused and small
+- Use type hints where appropriate
+### Example:
+```python
+def process_pdf_file(uploaded_file: UploadedFile) -> Optional[str]:
+    """
+    Extract text content from uploaded PDF file.
+    Args:
+        uploaded_file: Streamlit uploaded file object
+    Returns:
+        str: Extracted text content or None if extraction fails
+    """
+    # Implementation here
+    pass
+```
+### File Organization
+- Keep modules focused on single responsibilities
+- Use clear directory structure
+- Add `__init__.py` files for packages
+- Group related functionality together
+## 🧪 Testing
+### Running Tests
+```bash
+# Basic functionality tests
+python test_basic.py
+# Docker tests
+./docker-test.sh
+# Manual testing checklist
+# - PDF upload and processing
+# - Text input and summarization
+# - Different AI models
+# - Error handling scenarios
+```
+### Writing Tests
+- Add tests for new features
+- Test edge cases and error conditions
+- Use descriptive test names
+- Keep tests independent and isolated
+## 📚 Documentation
+### Code Documentation
+- Add docstrings to all functions and classes
+- Include type hints
+- Comment complex logic
+- Update README.md for new features
+### User Documentation
+- Update usage instructions
+- Add examples for new features
+- Include troubleshooting information
+- Keep Docker documentation current
+## 🐛 Reporting Issues
+When reporting bugs:
+1. Use the bug report template
+2. Include environment details
+3. Provide steps to reproduce
+4. Add relevant logs and screenshots
+5. Check for existing similar issues
+## 💡 Suggesting Features
+When suggesting features:
+1. Use the feature request template
+2. Explain the use case and motivation
+3. Consider implementation complexity
+4. Provide mockups or examples if helpful
+## 🏷️ Issue Labels
+- `bug` - Something isn't working
+- `enhancement` - New feature or request
+- `documentation` - Improvements or additions to docs
+- `good first issue` - Good for newcomers
+- `help wanted` - Extra attention is needed
+- `question` - Further information is requested
+## 🎯 Areas for Contribution
+### High Priority
+- Bug fixes and stability improvements
+- Performance optimizations
+- Better error handling
+- Documentation improvements
+### Medium Priority
+- New AI model integrations
+- UI/UX enhancements
+- Additional file format support
+- Internationalization
+### Low Priority
+- Code refactoring
+- Additional testing
+- Development tooling
+- CI/CD improvements
+## 📞 Getting Help
+- 💬 [GitHub Discussions](https://github.com/PRATEEK-260/NoteSnap/discussions)
+- 🐛 [Issues](https://github.com/PRATEEK-260/NoteSnap/issues)
+## 🙏 Recognition
+Contributors will be:
+- Listed in the README.md
+- Mentioned in release notes
+- Given credit in commit messages
+- Invited to be maintainers (for significant contributions)
+---
+Thank you for contributing to NoteSnap! 🎉

DOCKER.md ADDED Viewed

	@@ -0,0 +1,306 @@

+# 🐳 Docker Deployment Guide
+This guide covers Docker deployment options for the AI Notes Summarizer application.
+## 📋 Prerequisites
+- Docker Engine 20.10+
+- Docker Compose 2.0+
+- At least 4GB RAM available for Docker
+- Internet connection for downloading AI models
+## 🚀 Quick Start
+### Using Docker Compose (Recommended)
+```bash
+# Clone the repository
+git clone <repository-url>
+cd ai-notes-summarizer
+# Start the application
+docker-compose up -d
+# Access at http://localhost:8501
+```
+### Using Docker Scripts
+```bash
+# Build the image
+./docker-build.sh
+# Run the container
+./docker-run.sh
+# Test the deployment
+./docker-test.sh
+```
+## 📁 Docker Files Overview
+| File | Purpose |
+|------|---------|
+| `Dockerfile` | Standard multi-stage build |
+| `Dockerfile.prod` | Production-optimized build |
+| `docker-compose.yml` | Production deployment |
+| `docker-compose.dev.yml` | Development environment |
+| `docker-build.sh` | Build script |
+| `docker-run.sh` | Run script |
+| `docker-dev.sh` | Development script |
+| `docker-test.sh` | Testing script |
+## 🔧 Configuration
+### Environment Variables
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `STREAMLIT_SERVER_PORT` | 8501 | Application port |
+| `STREAMLIT_SERVER_ADDRESS` | 0.0.0.0 | Bind address |
+| `TRANSFORMERS_CACHE` | /app/.cache/huggingface | Model cache directory |
+| `MAX_FILE_SIZE_MB` | 10 | Maximum PDF file size |
+| `TORCH_HOME` | /app/.cache/torch | PyTorch cache |
+### Volume Mounts
+| Volume | Purpose |
+|--------|---------|
+| `model_cache` | Persistent AI model storage |
+| `logs` | Application logs |
+| `uploads` | Temporary file storage |
+## 🏗️ Build Options
+### Standard Build
+```bash
+docker build -t ai-notes-summarizer .
+```
+### Production Build
+```bash
+docker build -f Dockerfile.prod -t ai-notes-summarizer:prod .
+```
+### Development Build
+```bash
+docker build --target dependencies -t ai-notes-summarizer:dev .
+```
+## 🚀 Deployment Options
+### 1. Docker Compose (Production)
+```yaml
+# docker-compose.yml
+version: '3.8'
+services:
+  ai-notes-summarizer:
+    image: ai-notes-summarizer:latest
+    ports:
+      - "8501:8501"
+    volumes:
+      - model_cache:/app/.cache
+      - logs:/app/logs
+    restart: unless-stopped
+```
+### 2. Docker Swarm
+```bash
+# Initialize swarm
+docker swarm init
+# Deploy stack
+docker stack deploy -c docker-compose.yml ai-notes-stack
+```
+### 3. Kubernetes
+```yaml
+# k8s-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ai-notes-summarizer
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: ai-notes-summarizer
+  template:
+    metadata:
+      labels:
+        app: ai-notes-summarizer
+    spec:
+      containers:
+      - name: ai-notes-summarizer
+        image: ai-notes-summarizer:latest
+        ports:
+        - containerPort: 8501
+        resources:
+          limits:
+            memory: "4Gi"
+            cpu: "2"
+          requests:
+            memory: "2Gi"
+            cpu: "1"
+```
+## 🔍 Monitoring and Logging
+### Health Checks
+```bash
+# Check container health
+docker ps --filter "name=ai-notes-summarizer"
+# View health check logs
+docker inspect ai-notes-summarizer | grep -A 10 Health
+```
+### Logs
+```bash
+# View application logs
+docker-compose logs -f
+# View specific service logs
+docker logs -f ai-notes-summarizer
+```
+### Metrics
+```bash
+# Container stats
+docker stats ai-notes-summarizer
+# Resource usage
+docker exec ai-notes-summarizer df -h
+docker exec ai-notes-summarizer free -h
+```
+## 🛠️ Development
+### Development Environment
+```bash
+# Start development environment with live reload
+docker-compose -f docker-compose.dev.yml up
+# Or use the script
+./docker-dev.sh
+```
+### Debugging
+```bash
+# Access container shell
+docker exec -it ai-notes-summarizer bash
+# View application files
+docker exec ai-notes-summarizer ls -la /app
+# Check Python environment
+docker exec ai-notes-summarizer pip list
+```
+## 🔒 Security
+### Security Features
+- Non-root user execution
+- Minimal base image
+- No unnecessary packages
+- Health checks enabled
+- Resource limits configured
+### Security Scanning
+```bash
+# Scan for vulnerabilities (if you have docker scan)
+docker scan ai-notes-summarizer:latest
+# Check running processes
+docker exec ai-notes-summarizer ps aux
+```
+## 🚨 Troubleshooting
+### Common Issues
+1. **Container won't start**
+   ```bash
+   docker logs ai-notes-summarizer
+   ```
+2. **Out of memory**
+   ```bash
+   # Increase Docker memory limit
+   docker update --memory=4g ai-notes-summarizer
+   ```
+3. **Model download fails**
+   ```bash
+   # Check internet connectivity
+   docker exec ai-notes-summarizer curl -I https://huggingface.co
+   ```
+4. **Permission issues**
+   ```bash
+   # Fix ownership
+   docker exec -u root ai-notes-summarizer chown -R app:app /app
+   ```
+### Performance Optimization
+1. **Use multi-stage builds** (already implemented)
+2. **Enable BuildKit**:
+   ```bash
+   export DOCKER_BUILDKIT=1
+   docker build .
+   ```
+3. **Use .dockerignore** (already included)
+4. **Pin dependency versions** (see requirements.docker.txt)
+## 📊 Resource Requirements
+### Minimum Requirements
+- CPU: 1 core
+- RAM: 2GB
+- Storage: 5GB
+### Recommended Requirements
+- CPU: 2 cores
+- RAM: 4GB
+- Storage: 10GB
+### Production Requirements
+- CPU: 4 cores
+- RAM: 8GB
+- Storage: 20GB
+- Load balancer for multiple instances
+## 🔄 Updates and Maintenance
+### Updating the Application
+```bash
+# Pull latest changes
+git pull
+# Rebuild and restart
+docker-compose up --build -d
+# Or use rolling update
+docker-compose up -d --force-recreate
+```
+### Backup and Restore
+```bash
+# Backup volumes
+docker run --rm -v ai-notes-model-cache:/data -v $(pwd):/backup alpine tar czf /backup/model-cache-backup.tar.gz -C /data .
+# Restore volumes
+docker run --rm -v ai-notes-model-cache:/data -v $(pwd):/backup alpine tar xzf /backup/model-cache-backup.tar.gz -C /data
+```
+## 📞 Support
+For Docker-specific issues:
+1. Check container logs: `docker logs ai-notes-summarizer`
+2. Verify resource limits: `docker stats`
+3. Test connectivity: `docker exec ai-notes-summarizer curl localhost:8501`
+4. Review Docker documentation: https://docs.docker.com

Dockerfile CHANGED Viewed

@@ -1,20 +1,55 @@
-FROM python:3.13.5-slim
-WORKDIR /app
 RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
     git \
     && rm -rf /var/lib/apt/lists/*
-COPY requirements.txt ./
-COPY src/ ./src/
-RUN pip3 install -r requirements.txt
 EXPOSE 8501
-HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+# Multi-stage Dockerfile for AI Notes Summarizer
+# Stage 1: Base image with system dependencies
+FROM python:3.10-slim as base
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+# Install system dependencies
 RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
+    software-properties-common \
     git \
     && rm -rf /var/lib/apt/lists/*
+# Create non-root user for security
+RUN useradd --create-home --shell /bin/bash app
+# Stage 2: Dependencies installation
+FROM base as dependencies
+# Set working directory
+WORKDIR /app
+# Copy requirements first for better caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Stage 3: Application
+FROM dependencies as application
+# Copy application code
+COPY --chown=app:app . .
+# Create necessary directories
+RUN mkdir -p /app/uploads /app/logs && \
+    chown -R app:app /app
+# Switch to non-root user
+USER app
+# Expose port
 EXPOSE 8501
+# Health check
+HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8501/_stcore/health || exit 1
+# Default command
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.headless=true", "--server.fileWatcherType=none", "--browser.gatherUsageStats=false"]

Dockerfile.prod ADDED Viewed

	@@ -0,0 +1,80 @@

+# Production-optimized Dockerfile for AI Notes Summarizer
+FROM python:3.10-slim as base
+# Build arguments
+ARG BUILD_DATE
+ARG VCS_REF
+ARG VERSION=1.0.0
+# Labels for metadata
+LABEL maintainer="AI Notes Summarizer Team" \
+      org.label-schema.build-date=$BUILD_DATE \
+      org.label-schema.name="ai-notes-summarizer" \
+      org.label-schema.description="AI-powered document summarization application" \
+      org.label-schema.url="https://github.com/your-repo/ai-notes-summarizer" \
+      org.label-schema.vcs-ref=$VCS_REF \
+      org.label-schema.version=$VERSION \
+      org.label-schema.schema-version="1.0"
+# Set environment variables for production
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    STREAMLIT_SERVER_HEADLESS=true \
+    STREAMLIT_BROWSER_GATHER_USAGE_STATS=false \
+    TRANSFORMERS_CACHE=/app/.cache/huggingface \
+    TORCH_HOME=/app/.cache/torch \
+    HF_HOME=/app/.cache/huggingface
+# Install system dependencies and clean up in one layer
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+# Create non-root user
+RUN useradd --create-home --shell /bin/bash --uid 1000 app
+# Stage 2: Dependencies
+FROM base as dependencies
+WORKDIR /app
+# Copy requirements and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt && \
+    pip cache purge
+# Stage 3: Application
+FROM dependencies as application
+# Copy application code with proper ownership
+COPY --chown=app:app . .
+# Create necessary directories and set permissions
+RUN mkdir -p /app/.cache /app/logs /app/uploads && \
+    chown -R app:app /app && \
+    chmod +x /app/*.sh 2>/dev/null || true
+# Switch to non-root user
+USER app
+# Expose port
+EXPOSE 8501
+# Health check
+HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:8501/_stcore/health || exit 1
+# Run the application
+CMD ["streamlit", "run", "app.py", \
+     "--server.port=8501", \
+     "--server.address=0.0.0.0", \
+     "--server.headless=true", \
+     "--server.fileWatcherType=none", \
+     "--browser.gatherUsageStats=false", \
+     "--server.maxUploadSize=10"]

LICENSE ADDED Viewed

	@@ -0,0 +1,225 @@

+<<<<<<< HEAD
+MIT License
+Copyright (c) 2025 Muhammed Midlaj
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+=======
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+>>>>>>> 9b4f2dab9437daaefabf059cd647a5761c93c197

README.md CHANGED Viewed

@@ -1,20 +1,481 @@
----
-title: NoteSnap
-emoji: 🚀
-colorFrom: red
-colorTo: red
-sdk: docker
-app_port: 8501
-tags:
-- streamlit
-pinned: false
-short_description: Streamlit template space
-license: apache-2.0
----
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

+<<<<<<< HEAD
+# 📝 NoteSnap
+<div align="center">
+![NoteSnap Logo](https://img.shields.io/badge/📝-NoteSnap-blue?style=for-the-badge)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
+[![Streamlit](https://img.shields.io/badge/Streamlit-FF4B4B?style=flat&logo=streamlit&logoColor=white)](https://streamlit.io/)
+[![Docker](https://img.shields.io/badge/Docker-2496ED?style=flat&logo=docker&logoColor=white)](https://www.docker.com/)
+[![Transformers](https://img.shields.io/badge/🤗%20Transformers-FFD21E?style=flat)](https://huggingface.co/transformers/)
+[![GitHub stars](https://img.shields.io/github/stars/PRATEEK-260/NoteSnap?style=social)](https://github.com/PRATEEK-260/NoteSnap/stargazers)
+[![GitHub forks](https://img.shields.io/github/forks/PRATEEK-260/NoteSnap?style=social)](https://github.com/PRATEEK-260/NoteSnap/network/members)
+[![GitHub issues](https://img.shields.io/github/issues/PRATEEK-260/NoteSnap)](https://github.com/PRATEEK-260/NoteSnap/issues)
+</div>
+A powerful web application that transforms lengthy documents and notes into concise, bullet-point summaries using state-of-the-art AI models.
+---
+## 📋 Table of Contents
+- [✨ Features](#-features)
+- [🚀 Quick Start](#-quick-start)
+  - [Option 1: Docker (Recommended)](#option-1-docker-recommended)
+  - [Option 2: Local Installation](#option-2-local-installation)
+- [📖 Usage Guide](#-usage-guide)
+- [🖼️ Screenshots](#️-screenshots)
+- [🛠️ Technical Details](#️-technical-details)
+- [🐳 Docker Deployment](#-docker-deployment)
+- [🔧 Configuration](#-configuration)
+- [🚨 Troubleshooting](#-troubleshooting)
+- [🤝 Contributing](#-contributing)
+- [📄 License](#-license)
+- [🙏 Acknowledgments](#-acknowledgments)
+- [📞 Support](#-support)
+---
+## ✨ Features
+- **PDF Processing**: Upload PDF files and extract text content automatically
+- **Direct Text Input**: Paste text content directly for immediate summarization
+- **AI-Powered Summarization**: Uses Hugging Face Transformers (BART, T5) for high-quality summaries
+- **Bullet-Point Format**: Clean, readable bullet-point summaries
+- **Multiple AI Models**: Choose from different pre-trained models
+- **Customizable Length**: Adjust summary length (Short, Medium, Long)
+- **Progress Tracking**: Real-time progress indicators during processing
+- **Download Summaries**: Save generated summaries as text files
+- **Statistics**: View compression ratios and word counts
+- **Error Handling**: Comprehensive error handling and user feedback
+## 🚀 Quick Start
+### 🌐 Try Online (Fastest)
+**[🚀 Live Demo on Hugging Face Spaces](https://huggingface.co/spaces/PRATEEK-260/NoteSnap)**
+- No installation required
+- Instant access in your browser
+- Full functionality available
+### Option 1: Docker (Recommended)
+#### Prerequisites
+- Docker and Docker Compose installed
+- Internet connection (for downloading AI models)
+#### Using Docker Compose (Easiest)
+```bash
+# Clone the repository
+git clone https://github.com/PRATEEK-260/NoteSnap.git
+cd NoteSnap
+# Start the application
+docker-compose up -d
+# Access the application at http://localhost:8501
+```
+#### Using Docker Scripts
+```bash
+# Build the Docker image
+./docker-build.sh
+# Run the container
+./docker-run.sh
+# For development with live code reloading
+./docker-dev.sh
+```
+#### Manual Docker Commands
+```bash
+# Build the image
+docker build -t notesnap .
+# Run the container
+docker run -p 8501:8501 notesnap
+```
+### Option 2: Local Installation
+#### Prerequisites
+- Python 3.8 or higher
+- pip (Python package installer)
+- Internet connection (for downloading AI models)
+#### Installation Steps
+1. **Clone the repository**
+   ```bash
+   git clone https://github.com/PRATEEK-260/NoteSnap.git
+   cd NoteSnap
+   ```
+2. **Install dependencies**
+   ```bash
+   pip install -r requirements.txt
+   ```
+3. **Run the application**
+   ```bash
+   streamlit run app.py
+   ```
+4. **Open your browser**
+   - The application will automatically open at `http://localhost:8501`
+   - If it doesn't open automatically, navigate to the URL manually
+## 📖 Usage Guide
+### PDF Summarization
+1. **Upload PDF**: Click on the "📄 PDF Upload" tab
+2. **Select File**: Choose a PDF file (max 10MB)
+3. **Process**: Click "📖 Extract & Summarize PDF"
+4. **Review**: View the extracted text preview
+5. **Get Summary**: The AI will generate a bullet-point summary
+6. **Download**: Save the summary using the download button
+### Text Summarization
+1. **Input Text**: Click on the "📝 Text Input" tab
+2. **Paste Content**: Enter or paste your text (minimum 100 characters)
+3. **Summarize**: Click "🚀 Summarize Text"
+4. **Review**: View the generated summary
+5. **Download**: Save the summary as needed
+### Settings
+- **AI Model**: Choose from BART (recommended), T5, or DistilBART
+- **Summary Length**: Select Short, Medium, or Long summaries
+- **Statistics**: View word counts and compression ratios
+## 🛠️ Technical Details
+### Architecture
+```
+NoteSnap/
+├── app.py                 # Main Streamlit application
+├── modules/
+│   ├── __init__.py
+│   ├── pdf_processor.py   # PDF text extraction
+│   ├── text_summarizer.py # AI summarization
+│   └── utils.py          # Utility functions
+├── requirements.txt       # Python dependencies
+└── README.md             # This file
+```
+### AI Models
+- **BART (facebook/bart-large-cnn)**: Best quality, recommended for most use cases
+- **T5 Small**: Faster processing, good for shorter texts
+- **DistilBART**: Balanced performance and speed
+### Dependencies
+- **Streamlit**: Web application framework
+- **Transformers**: Hugging Face AI models
+- **PyTorch**: Deep learning framework
+- **PyPDF2**: PDF text extraction
+- **Additional utilities**: See `requirements.txt`
+## 🔧 Configuration
+### Model Selection
+You can change the default model by modifying the `TextSummarizer` initialization in `app.py`:
+```python
+text_summarizer = TextSummarizer(model_name="your-preferred-model")
+```
+### Summary Length
+Adjust default summary lengths in `modules/text_summarizer.py`:
+```python
+self.min_summary_length = 50  # Minimum words
+self.max_summary_length = 300  # Maximum words
+```
+### File Size Limits
+Modify PDF file size limits in `modules/pdf_processor.py`:
+```python
+self.max_file_size = 10 * 1024 * 1024  # 10MB
+```
+## 🚨 Troubleshooting
+### Common Issues
+1. **Model Loading Errors**
+   - Ensure stable internet connection
+   - Check available disk space (models can be 1-2GB)
+   - Try switching to a smaller model (T5 Small or DistilBART)
+2. **PDF Processing Issues**
+   - Ensure PDF is not encrypted
+   - Check if PDF contains readable text (not just images)
+   - Try with a smaller PDF file
+3. **Memory Errors**
+   - Reduce text length
+   - Close other applications
+   - Try using CPU instead of GPU
+4. **Slow Performance**
+   - Use GPU if available
+   - Choose smaller models for faster processing
+   - Process shorter text chunks
+### Error Messages
+- **"Text is too short"**: Minimum 100 characters required
+- **"No readable text found"**: PDF may contain only images
+- **"Model loading error"**: Check internet connection
+- **"Out of memory"**: Reduce text length or restart application
+## 🎯 Best Practices
+### For Best Results
+1. **Text Quality**: Use well-formatted, coherent text
+2. **Length**: Optimal text length is 500-5000 words
+3. **Content**: Works best with structured content (articles, reports, notes)
+4. **Model Choice**: Use BART for academic/formal content, T5 for general text
+### Performance Tips
+1. **GPU Usage**: Enable CUDA for faster processing
+2. **Batch Processing**: Process multiple documents separately
+3. **Model Caching**: Models are cached after first load
+4. **Text Preprocessing**: Clean text improves summary quality
+## 🖼️ Screenshots
+<div align="center">
+### Main Interface
+![Main Interface](Screenshots/Main%20interface.png)
+*Clean and intuitive interface with PDF upload and text input options*
+### PDF Processing
+![PDF Processing](Screenshots/pdf%20processing.png)
+*Real-time PDF processing with progress indicators*
+### Summary Results
+![Summary Results](Screenshots/Summery%20Result.png)
+*Bullet-point summaries with statistics and download options*
+### Settings Panel
+![Settings Panel](Screenshots/settings%20panel.png)
+*Customizable AI model selection and summary length options*
+</div>
+## 🎥 Demo
+🚀 **[Live Demo](https://huggingface.co/spaces/PRATEEK-260/NoteSnap)** - Try it now on Hugging Face Spaces!
+## 📄 License
+This project is open source and available under the MIT License.
+## 🤝 Contributing
+Contributions are welcome! Please feel free to submit issues, feature requests, or pull requests.
+## 🐳 Docker Deployment
+### Production Deployment
+For production deployment, use the standard Docker Compose configuration:
+```bash
+# Start in production mode
+docker-compose up -d
+# View logs
+docker-compose logs -f
+# Stop the application
+docker-compose down
+# Update the application
+docker-compose pull
+docker-compose up -d
+```
+### Development Mode
+For development with live code reloading:
+```bash
+# Start development environment
+docker-compose -f docker-compose.dev.yml up
+# Or use the convenience script
+./docker-dev.sh
+```
+### Docker Configuration
+#### Environment Variables
+- `STREAMLIT_SERVER_PORT`: Port for the application (default: 8501)
+- `TRANSFORMERS_CACHE`: Cache directory for AI models
+- `MAX_FILE_SIZE_MB`: Maximum PDF file size (default: 10MB)
+#### Volumes
+- `model_cache`: Persistent storage for downloaded AI models
+- `logs`: Application logs
+- `uploads`: Temporary file storage (optional)
+#### Resource Limits
+- Memory: 4GB limit, 2GB reserved
+- CPU: 2 cores limit, 1 core reserved
+### Docker Troubleshooting
+1. **Container won't start**: Check logs with `docker-compose logs`
+2. **Out of memory**: Increase Docker memory limits
+3. **Model download fails**: Ensure internet connectivity
+4. **Permission issues**: Check file ownership and Docker user settings
+## 🤝 Contributing
+We welcome contributions from the community! Here's how you can help:
+### 🌟 Ways to Contribute
+- ⭐ **Star this repository** if you find it useful
+- 🐛 **Report bugs** by opening an [issue](https://github.com/PRATEEK-260/NoteSnap/issues)
+- 💡 **Suggest features** or improvements
+- 📖 **Improve documentation**
+- 🔧 **Submit pull requests** with bug fixes or new features
+### 🚀 Getting Started
+1. **Fork the repository**
+   ```bash
+   # Click the "Fork" button on GitHub, then:
+   git clone https://github.com/YOUR-USERNAME/NoteSnap.git
+   cd NoteSnap
+   ```
+2. **Create a feature branch**
+   ```bash
+   git checkout -b feature/amazing-feature
+   ```
+3. **Make your changes**
+   - Follow the existing code style
+   - Add tests for new features
+   - Update documentation as needed
+4. **Test your changes**
+   ```bash
+   # Run basic tests
+   python test_basic.py
+   # Test Docker build
+   ./docker-test.sh
+   ```
+5. **Submit a pull request**
+   ```bash
+   git add .
+   git commit -m "Add amazing feature"
+   git push origin feature/amazing-feature
+   ```
+### 📋 Development Guidelines
+- **Code Style**: Follow PEP 8 for Python code
+- **Documentation**: Update README.md for new features
+- **Testing**: Add tests for new functionality
+- **Docker**: Ensure Docker compatibility
+- **Dependencies**: Keep requirements.txt updated
+### 🐛 Reporting Issues
+When reporting issues, please include:
+- **Environment details** (OS, Python version, Docker version)
+- **Steps to reproduce** the issue
+- **Expected vs actual behavior**
+- **Error messages** or logs
+- **Screenshots** if applicable
+[**Report an Issue →**](https://github.com/PRATEEK-260/NoteSnap/issues/new)
+### 💬 Discussions
+Join our community discussions:
+- [**GitHub Discussions**](https://github.com/PRATEEK-260/NoteSnap/discussions) - General questions and ideas
+- [**Issues**](https://github.com/PRATEEK-260/NoteSnap/issues) - Bug reports and feature requests
+## 📄 License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+## 🙏 Acknowledgments
+### 🛠️ Built With
+- [**Streamlit**](https://streamlit.io/) - Web application framework
+- [**Hugging Face Transformers**](https://huggingface.co/transformers/) - AI/ML models
+- [**PyTorch**](https://pytorch.org/) - Deep learning framework
+- [**PyPDF2**](https://pypdf2.readthedocs.io/) - PDF processing
+- [**Docker**](https://www.docker.com/) - Containerization
+### 🎯 Inspiration
+- Inspired by the need for efficient document summarization
+- Built to help students, researchers, and professionals save time
+- Leverages state-of-the-art AI models for high-quality summaries
+### 🤖 AI Models
+Special thanks to the teams behind these amazing models:
+- [**BART**](https://huggingface.co/facebook/bart-large-cnn) by Facebook AI
+- [**T5**](https://huggingface.co/t5-small) by Google Research
+- [**DistilBART**](https://huggingface.co/sshleifer/distilbart-cnn-12-6) by Sam Shleifer
+## 📞 Support
+If you encounter any issues or have questions:
+### 🔍 Self-Help Resources
+1. 📖 Check the [troubleshooting section](#-troubleshooting) above
+2. 🐛 Review error messages for specific guidance
+3. 📦 Ensure all dependencies are properly installed
+4. 🔄 Try with different models or settings
+5. 🐳 For Docker issues, check container logs: `docker-compose logs`
+### 💬 Get Help
+- 🐛 **Bug Reports**: [Open an Issue](https://github.com/PRATEEK-260/NoteSnap/issues/new)
+- 💡 **Feature Requests**: [Start a Discussion](https://github.com/PRATEEK-260/NoteSnap/discussions)
+---
+<div align="center">
+**Made with ❤️ by [PRATEEK-260](https://github.com/PRATEEK-260)**
+**Happy Summarizing! 📝✨**
+[![GitHub](https://img.shields.io/badge/GitHub-PRATEEK--260-181717?style=flat&logo=github)](https://github.com/PRATEEK-260)
+</div>
+=======
+# NoteSnap
+>>>>>>> 9b4f2dab9437daaefabf059cd647a5761c93c197

app.py ADDED Viewed

	@@ -0,0 +1,196 @@

+"""
+NoteSnap - Main Application
+A Streamlit web application for summarizing PDF files and text content using AI.
+"""
+import streamlit as st
+import os
+from pathlib import Path
+# Import custom modules
+from modules.pdf_processor import PDFProcessor
+from modules.text_summarizer import TextSummarizer
+from modules.utils import (
+    setup_logging,
+    validate_input,
+    display_summary_stats,
+    format_file_size,
+)
+# Initialize components
+@st.cache_resource
+def initialize_components():
+    """Initialize PDF processor and text summarizer"""
+    pdf_processor = PDFProcessor()
+    text_summarizer = TextSummarizer()
+    return pdf_processor, text_summarizer
+def main():
+    """Main application function"""
+    st.set_page_config(
+        page_title="NoteSnap",
+        page_icon="📝",
+        layout="wide",
+        initial_sidebar_state="expanded",
+    )
+    # Initialize components
+    pdf_processor, text_summarizer = initialize_components()
+    # App header
+    st.title("📝 NoteSnap")
+    st.markdown(
+        "Transform your lengthy documents and notes into concise, bullet-point summaries using AI."
+    )
+    # Sidebar for options
+    st.sidebar.header("⚙️ Settings")
+    # Model selection
+    model_options = {
+        "BART (Recommended)": "facebook/bart-large-cnn",
+        "T5 Small": "t5-small",
+        "DistilBART": "sshleifer/distilbart-cnn-12-6",
+    }
+    selected_model = st.sidebar.selectbox(
+        "Choose AI Model:",
+        options=list(model_options.keys()),
+        index=0,
+        help="BART is recommended for best quality summaries",
+    )
+    # Update text summarizer model if changed
+    if text_summarizer.model_name != model_options[selected_model]:
+        text_summarizer.model_name = model_options[selected_model]
+        text_summarizer.summarizer = None  # Reset to reload model
+    # Summary length options
+    summary_length = st.sidebar.select_slider(
+        "Summary Length:",
+        options=["Short", "Medium", "Long"],
+        value="Medium",
+        help="Choose the desired length of the summary",
+    )
+    # Update summary length settings
+    length_settings = {"Short": (30, 150), "Medium": (50, 300), "Long": (100, 500)}
+    text_summarizer.min_summary_length, text_summarizer.max_summary_length = (
+        length_settings[summary_length]
+    )
+    # Main content area
+    tab1, tab2 = st.tabs(["📄 PDF Upload", "📝 Text Input"])
+    with tab1:
+        st.header("Upload PDF File")
+        st.markdown("Upload a PDF file to extract and summarize its content.")
+        uploaded_file = st.file_uploader(
+            "Choose a PDF file", type=["pdf"], help="Upload a PDF file (max 10MB)"
+        )
+        if uploaded_file is not None:
+            # Display file info
+            file_size = format_file_size(uploaded_file.size)
+            st.info(f"📄 **File:** {uploaded_file.name} ({file_size})")
+            # Process PDF button
+            if st.button("📖 Extract & Summarize PDF", type="primary"):
+                with st.spinner("Processing PDF file..."):
+                    # Extract text from PDF
+                    extracted_text = pdf_processor.process_pdf(uploaded_file)
+                    if extracted_text:
+                        st.success("✅ Text extracted successfully!")
+                        # Show extracted text preview
+                        with st.expander("📝 View Extracted Text (Preview)"):
+                            st.text_area(
+                                "Extracted Content:",
+                                value=(
+                                    extracted_text[:1000] + "..."
+                                    if len(extracted_text) > 1000
+                                    else extracted_text
+                                ),
+                                height=200,
+                                disabled=True,
+                            )
+                        # Generate summary
+                        summary = text_summarizer.summarize_text(extracted_text)
+                        if summary:
+                            st.success("✅ Summary generated successfully!")
+                            # Display summary
+                            st.subheader("📋 Summary")
+                            st.markdown(summary)
+                            # Display statistics
+                            st.subheader("📊 Statistics")
+                            display_summary_stats(extracted_text, summary)
+                            # Download option
+                            st.download_button(
+                                label="💾 Download Summary",
+                                data=summary,
+                                file_name=f"{uploaded_file.name}_summary.txt",
+                                mime="text/plain",
+                            )
+    with tab2:
+        st.header("Direct Text Input")
+        st.markdown("Paste your text content directly for summarization.")
+        text_input = st.text_area(
+            "Enter your text here:",
+            height=300,
+            placeholder="Paste your text content here...",
+            help="Minimum 100 characters required for effective summarization",
+        )
+        # Character count
+        char_count = len(text_input)
+        st.caption(f"Characters: {char_count:,}")
+        if st.button("🚀 Summarize Text", type="primary"):
+            if validate_input(text_input, min_length=100):
+                # Generate summary
+                summary = text_summarizer.summarize_text(text_input)
+                if summary:
+                    st.success("✅ Summary generated successfully!")
+                    # Display summary
+                    st.subheader("📋 Summary")
+                    st.markdown(summary)
+                    # Display statistics
+                    st.subheader("📊 Statistics")
+                    display_summary_stats(text_input, summary)
+                    # Download option
+                    st.download_button(
+                        label="💾 Download Summary",
+                        data=summary,
+                        file_name="text_summary.txt",
+                        mime="text/plain",
+                    )
+    # Footer
+    st.markdown("---")
+    st.markdown(
+        """
+        <div style='text-align: center; color: #666;'>
+            <p>NoteSnap | Powered by Hugging Face Transformers</p>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+if __name__ == "__main__":
+    main()

docker-build.sh ADDED Viewed

	@@ -0,0 +1,55 @@

+#!/bin/bash
+# AI Notes Summarizer - Docker Build Script
+set -e
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+# Configuration
+IMAGE_NAME="ai-notes-summarizer"
+TAG="${1:-latest}"
+DOCKERFILE="${2:-Dockerfile}"
+echo -e "${BLUE}🐳 Building AI Notes Summarizer Docker Image${NC}"
+echo -e "${YELLOW}Image: ${IMAGE_NAME}:${TAG}${NC}"
+echo -e "${YELLOW}Dockerfile: ${DOCKERFILE}${NC}"
+echo ""
+# Check if Docker is running
+if ! docker info > /dev/null 2>&1; then
+    echo -e "${RED}❌ Docker is not running. Please start Docker and try again.${NC}"
+    exit 1
+fi
+# Build the image
+echo -e "${BLUE}📦 Building Docker image...${NC}"
+docker build \
+    -t "${IMAGE_NAME}:${TAG}" \
+    -f "${DOCKERFILE}" \
+    --build-arg BUILD_DATE="$(date -u +'%Y-%m-%dT%H:%M:%SZ')" \
+    --build-arg VCS_REF="$(git rev-parse --short HEAD 2>/dev/null || echo 'unknown')" \
+    .
+if [ $? -eq 0 ]; then
+    echo -e "${GREEN}✅ Docker image built successfully!${NC}"
+    echo ""
+    # Show image info
+    echo -e "${BLUE}📊 Image Information:${NC}"
+    docker images "${IMAGE_NAME}:${TAG}" --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}\t{{.CreatedAt}}"
+    echo ""
+    echo -e "${GREEN}🚀 To run the container:${NC}"
+    echo -e "${YELLOW}docker run -p 8501:8501 ${IMAGE_NAME}:${TAG}${NC}"
+    echo ""
+    echo -e "${GREEN}🐙 Or use Docker Compose:${NC}"
+    echo -e "${YELLOW}docker-compose up${NC}"
+else
+    echo -e "${RED}❌ Docker build failed!${NC}"
+    exit 1
+fi

docker-compose.dev.yml ADDED Viewed

	@@ -0,0 +1,42 @@

+version: '3.8'
+services:
+  ai-notes-summarizer-dev:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      target: dependencies  # Stop at dependencies stage for faster rebuilds
+    container_name: ai-notes-summarizer-dev
+    ports:
+      - "8501:8501"
+    environment:
+      - STREAMLIT_SERVER_PORT=8501
+      - STREAMLIT_SERVER_ADDRESS=0.0.0.0
+      - STREAMLIT_SERVER_HEADLESS=true
+      - STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
+      - TRANSFORMERS_CACHE=/app/.cache/huggingface
+      - TORCH_HOME=/app/.cache/torch
+      - HF_HOME=/app/.cache/huggingface
+      - DEBUG=true
+    volumes:
+      # Mount source code for live development
+      - .:/app
+      # Model cache persistence
+      - model_cache_dev:/app/.cache
+      # Logs
+      - ./logs:/app/logs
+      # Uploads
+      - ./uploads:/app/uploads
+    working_dir: /app
+    command: ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.headless=true", "--server.fileWatcherType=poll"]
+    restart: unless-stopped
+    user: "1000:1000"  # Use host user ID to avoid permission issues
+    deploy:
+      resources:
+        limits:
+          memory: 4G
+          cpus: '2.0'
+volumes:
+  model_cache_dev:
+    driver: local

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,46 @@

+version: '3.8'
+services:
+  ai-notes-summarizer:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: ai-notes-summarizer
+    ports:
+      - "8501:8501"
+    environment:
+      - STREAMLIT_SERVER_PORT=8501
+      - STREAMLIT_SERVER_ADDRESS=0.0.0.0
+      - STREAMLIT_SERVER_HEADLESS=true
+      - STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
+      - TRANSFORMERS_CACHE=/app/.cache/huggingface
+      - TORCH_HOME=/app/.cache/torch
+      - HF_HOME=/app/.cache/huggingface
+    volumes:
+      # Model cache persistence
+      - model_cache:/app/.cache
+      # Logs persistence
+      - logs:/app/logs
+      # Optional: Mount local uploads directory for development
+      # - ./uploads:/app/uploads
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8501/_stcore/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+    deploy:
+      resources:
+        limits:
+          memory: 4G
+          cpus: '2.0'
+        reservations:
+          memory: 2G
+          cpus: '1.0'
+volumes:
+  model_cache:
+    driver: local
+  logs:
+    driver: local

docker-dev.sh ADDED Viewed

	@@ -0,0 +1,35 @@

+#!/bin/bash
+# AI Notes Summarizer - Docker Development Script
+set -e
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+echo -e "${BLUE}🛠️  Starting AI Notes Summarizer in Development Mode${NC}"
+echo ""
+# Check if Docker is running
+if ! docker info > /dev/null 2>&1; then
+    echo -e "${RED}❌ Docker is not running. Please start Docker and try again.${NC}"
+    exit 1
+fi
+# Check if docker-compose is available
+if ! command -v docker-compose &> /dev/null; then
+    echo -e "${RED}❌ docker-compose is not installed. Please install it and try again.${NC}"
+    exit 1
+fi
+# Create necessary directories
+mkdir -p logs uploads
+# Start development environment
+echo -e "${BLUE}🐳 Starting development environment...${NC}"
+docker-compose -f docker-compose.dev.yml up --build
+echo -e "${GREEN}✅ Development environment stopped.${NC}"

docker-run.sh ADDED Viewed

	@@ -0,0 +1,72 @@

+#!/bin/bash
+# AI Notes Summarizer - Docker Run Script
+set -e
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+# Configuration
+IMAGE_NAME="ai-notes-summarizer"
+TAG="${1:-latest}"
+CONTAINER_NAME="ai-notes-summarizer-app"
+PORT="${2:-8501}"
+echo -e "${BLUE}🚀 Running AI Notes Summarizer Docker Container${NC}"
+echo -e "${YELLOW}Image: ${IMAGE_NAME}:${TAG}${NC}"
+echo -e "${YELLOW}Port: ${PORT}${NC}"
+echo ""
+# Check if Docker is running
+if ! docker info > /dev/null 2>&1; then
+    echo -e "${RED}❌ Docker is not running. Please start Docker and try again.${NC}"
+    exit 1
+fi
+# Check if image exists
+if ! docker image inspect "${IMAGE_NAME}:${TAG}" > /dev/null 2>&1; then
+    echo -e "${RED}❌ Image ${IMAGE_NAME}:${TAG} not found. Please build it first:${NC}"
+    echo -e "${YELLOW}./docker-build.sh${NC}"
+    exit 1
+fi
+# Stop and remove existing container if it exists
+if docker ps -a --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then
+    echo -e "${YELLOW}🛑 Stopping existing container...${NC}"
+    docker stop "${CONTAINER_NAME}" > /dev/null 2>&1 || true
+    docker rm "${CONTAINER_NAME}" > /dev/null 2>&1 || true
+fi
+# Create directories for volumes
+mkdir -p logs uploads
+# Run the container
+echo -e "${BLUE}🐳 Starting container...${NC}"
+docker run -d \
+    --name "${CONTAINER_NAME}" \
+    -p "${PORT}:8501" \
+    -v "$(pwd)/logs:/app/logs" \
+    -v "$(pwd)/uploads:/app/uploads" \
+    -v ai-notes-model-cache:/app/.cache \
+    --restart unless-stopped \
+    "${IMAGE_NAME}:${TAG}"
+if [ $? -eq 0 ]; then
+    echo -e "${GREEN}✅ Container started successfully!${NC}"
+    echo ""
+    echo -e "${GREEN}🌐 Application URL: ${YELLOW}http://localhost:${PORT}${NC}"
+    echo -e "${GREEN}📊 Container Status:${NC}"
+    docker ps --filter "name=${CONTAINER_NAME}" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
+    echo ""
+    echo -e "${BLUE}📝 Useful commands:${NC}"
+    echo -e "${YELLOW}View logs: docker logs -f ${CONTAINER_NAME}${NC}"
+    echo -e "${YELLOW}Stop container: docker stop ${CONTAINER_NAME}${NC}"
+    echo -e "${YELLOW}Remove container: docker rm ${CONTAINER_NAME}${NC}"
+else
+    echo -e "${RED}❌ Failed to start container!${NC}"
+    exit 1
+fi

docker-test.sh ADDED Viewed

	@@ -0,0 +1,117 @@

+#!/bin/bash
+# AI Notes Summarizer - Docker Test Script
+set -e
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+# Configuration
+IMAGE_NAME="ai-notes-summarizer"
+TAG="test"
+CONTAINER_NAME="ai-notes-summarizer-test"
+TEST_PORT="8502"
+echo -e "${BLUE}🧪 Testing AI Notes Summarizer Docker Setup${NC}"
+echo ""
+# Function to cleanup
+cleanup() {
+    echo -e "${YELLOW}🧹 Cleaning up test resources...${NC}"
+    docker stop "${CONTAINER_NAME}" > /dev/null 2>&1 || true
+    docker rm "${CONTAINER_NAME}" > /dev/null 2>&1 || true
+    docker rmi "${IMAGE_NAME}:${TAG}" > /dev/null 2>&1 || true
+}
+# Trap cleanup on exit
+trap cleanup EXIT
+# Test 1: Build the Docker image
+echo -e "${BLUE}📦 Test 1: Building Docker image...${NC}"
+if docker build -t "${IMAGE_NAME}:${TAG}" .; then
+    echo -e "${GREEN}✅ Docker build successful${NC}"
+else
+    echo -e "${RED}❌ Docker build failed${NC}"
+    exit 1
+fi
+# Test 2: Check image size
+echo -e "${BLUE}📊 Test 2: Checking image size...${NC}"
+IMAGE_SIZE=$(docker images "${IMAGE_NAME}:${TAG}" --format "{{.Size}}")
+echo -e "${YELLOW}Image size: ${IMAGE_SIZE}${NC}"
+# Test 3: Run container
+echo -e "${BLUE}🚀 Test 3: Starting container...${NC}"
+if docker run -d --name "${CONTAINER_NAME}" -p "${TEST_PORT}:8501" "${IMAGE_NAME}:${TAG}"; then
+    echo -e "${GREEN}✅ Container started successfully${NC}"
+else
+    echo -e "${RED}❌ Container failed to start${NC}"
+    exit 1
+fi
+# Test 4: Wait for application to be ready
+echo -e "${BLUE}⏳ Test 4: Waiting for application to be ready...${NC}"
+for i in {1..30}; do
+    if curl -f "http://localhost:${TEST_PORT}/_stcore/health" > /dev/null 2>&1; then
+        echo -e "${GREEN}✅ Application is ready${NC}"
+        break
+    fi
+    if [ $i -eq 30 ]; then
+        echo -e "${RED}❌ Application failed to start within 30 seconds${NC}"
+        docker logs "${CONTAINER_NAME}"
+        exit 1
+    fi
+    sleep 1
+done
+# Test 5: Check application response
+echo -e "${BLUE}🌐 Test 5: Testing application response...${NC}"
+if curl -s "http://localhost:${TEST_PORT}" | grep -q "AI Notes Summarizer"; then
+    echo -e "${GREEN}✅ Application responding correctly${NC}"
+else
+    echo -e "${RED}❌ Application not responding correctly${NC}"
+    exit 1
+fi
+# Test 6: Check container logs for errors
+echo -e "${BLUE}📝 Test 6: Checking container logs...${NC}"
+if docker logs "${CONTAINER_NAME}" 2>&1 | grep -i error; then
+    echo -e "${YELLOW}⚠️  Found errors in logs (see above)${NC}"
+else
+    echo -e "${GREEN}✅ No errors found in logs${NC}"
+fi
+# Test 7: Test Docker Compose
+echo -e "${BLUE}🐙 Test 7: Testing Docker Compose...${NC}"
+if docker-compose config > /dev/null 2>&1; then
+    echo -e "${GREEN}✅ Docker Compose configuration is valid${NC}"
+else
+    echo -e "${RED}❌ Docker Compose configuration is invalid${NC}"
+    exit 1
+fi
+# Test 8: Security scan (if available)
+echo -e "${BLUE}🔒 Test 8: Basic security check...${NC}"
+if command -v docker &> /dev/null; then
+    # Check if running as non-root
+    USER_CHECK=$(docker exec "${CONTAINER_NAME}" whoami 2>/dev/null || echo "root")
+    if [ "$USER_CHECK" != "root" ]; then
+        echo -e "${GREEN}✅ Container running as non-root user: ${USER_CHECK}${NC}"
+    else
+        echo -e "${YELLOW}⚠️  Container running as root user${NC}"
+    fi
+fi
+echo ""
+echo -e "${GREEN}🎉 All tests passed successfully!${NC}"
+echo ""
+echo -e "${BLUE}📊 Test Summary:${NC}"
+echo -e "${YELLOW}Image: ${IMAGE_NAME}:${TAG}${NC}"
+echo -e "${YELLOW}Size: ${IMAGE_SIZE}${NC}"
+echo -e "${YELLOW}Test URL: http://localhost:${TEST_PORT}${NC}"
+echo ""
+echo -e "${BLUE}🚀 Ready for deployment!${NC}"

formatting.patch ADDED Viewed

	@@ -0,0 +1,65 @@

+--- a/app.py
++++ b/app.py
+@@ -193,4 +193,4 @@
+ if __name__ == "__main__":
+-    main()
++    main()
+--- a/modules/pdf_processor.py
++++ b/modules/pdf_processor.py
+@@ -174,4 +174,4 @@
+                 "The extracted text is very short. Please check if the PDF contains readable text."
+             )
+-        return processed_text
++        return processed_text
+--- a/modules/text_summarizer.py
++++ b/modules/text_summarizer.py
+@@ -282,4 +282,4 @@
+             )
+             return None
+         except Exception as e:
+-            st.error(f"❌ Unexpected error during summarization: {str(e)}")
+-            return None
++            st.error(f"❌ Unexpected error during summarization: {str(e)}")
++            return None
+--- a/modules/utils.py
++++ b/modules/utils.py
+@@ -124,4 +124,4 @@
+     b64 = base64.b64encode(content.encode()).decode()
+     href = f'<a href="data:text/plain;base64,{b64}" download="{filename}">Download Summary</a>'
+-    return href
++    return href
+--- a/run.py
++++ b/run.py
+@@ -39,4 +39,4 @@
+ if __name__ == "__main__":
+-    main()
++    main()
+--- a/setup.py
++++ b/setup.py
+@@ -41,4 +41,4 @@
+             "ai-notes-summarizer=app:main",
+         ],
+     },
+-)
++)
+--- a/test_basic.py
++++ b/test_basic.py
+@@ -140,4 +140,4 @@
+ if __name__ == "__main__":
+     success = main()
+-    sys.exit(0 if success else 1)
++    sys.exit(0 if success else 1)

requirements.docker.txt ADDED Viewed

	@@ -0,0 +1,27 @@

+# Docker-optimized requirements for AI Notes Summarizer
+# Core web framework
+streamlit>=1.28.0
+# AI/ML libraries - CPU optimized versions for smaller image size
+transformers>=4.35.0
+torch>=2.0.0,<2.1.0  # Pin to avoid large updates
+tokenizers>=0.14.0
+# PDF processing
+PyPDF2>=3.0.1
+# Text processing and utilities
+regex>=2023.10.3
+numpy>=1.24.0,<1.25.0  # Pin for stability
+pandas>=2.0.0,<2.1.0   # Pin for stability
+# Additional dependencies for transformers
+accelerate>=0.24.0
+safetensors>=0.4.0
+# Optional: For better performance (smaller package)
+sentencepiece>=0.1.99
+# Production dependencies
+gunicorn>=21.2.0  # For potential WSGI deployment
+uvicorn>=0.23.0   # Alternative ASGI server

requirements.txt CHANGED Viewed

@@ -1,3 +1,27 @@
-altair
-pandas
-streamlit

+# Core web framework
+streamlit>=1.28.0
+# AI/ML libraries
+transformers>=4.35.0
+torch>=2.0.0
+tokenizers>=0.14.0
+# PDF processing
+PyPDF2>=3.0.1
+# Text processing and utilities
+regex>=2023.10.3
+numpy>=1.24.0
+pandas>=2.0.0
+# Additional dependencies for transformers
+accelerate>=0.24.0
+safetensors>=0.4.0
+# Optional: For better performance
+sentencepiece>=0.1.99
+# Development and testing (optional)
+pytest>=7.4.0
+black>=23.0.0
+flake8>=6.0.0

run.py ADDED Viewed

	@@ -0,0 +1,42 @@

+#!/usr/bin/env python3
+"""
+Run script for AI Notes Summarizer
+"""
+import subprocess
+import sys
+import os
+def main():
+    """Run the Streamlit application"""
+    try:
+        # Change to the application directory
+        app_dir = os.path.dirname(os.path.abspath(__file__))
+        os.chdir(app_dir)
+        # Run streamlit
+        subprocess.run(
+            [
+                sys.executable,
+                "-m",
+                "streamlit",
+                "run",
+                "app.py",
+                "--server.headless",
+                "true",
+                "--server.port",
+                "8501",
+                "--server.address",
+                "0.0.0.0",
+            ]
+        )
+    except KeyboardInterrupt:
+        print("\nApplication stopped by user")
+    except Exception as e:
+        print(f"Error running application: {e}")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

setup.py ADDED Viewed

	@@ -0,0 +1,44 @@

+"""
+Setup script for AI Notes Summarizer
+"""
+from setuptools import setup, find_packages
+with open("README.md", "r", encoding="utf-8") as fh:
+    long_description = fh.read()
+with open("requirements.txt", "r", encoding="utf-8") as fh:
+    requirements = [
+        line.strip() for line in fh if line.strip() and not line.startswith("#")
+    ]
+setup(
+    name="ai-notes-summarizer",
+    version="1.0.0",
+    author="AI Notes Summarizer",
+    description="A web application for AI-powered document summarization",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    packages=find_packages(),
+    classifiers=[
+        "Development Status :: 4 - Beta",
+        "Intended Audience :: Education",
+        "Intended Audience :: End Users/Desktop",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "Topic :: Text Processing :: Linguistic",
+    ],
+    python_requires=">=3.8",
+    install_requires=requirements,
+    entry_points={
+        "console_scripts": [
+            "ai-notes-summarizer=app:main",
+        ],
+    },
+)

start.sh ADDED Viewed

	@@ -0,0 +1,42 @@

+#!/bin/bash
+# AI Notes Summarizer - Startup Script
+echo "🚀 Starting AI Notes Summarizer..."
+# Check if Python is available
+if ! command -v python3 &> /dev/null; then
+    echo "❌ Python 3 is not installed. Please install Python 3.8 or higher."
+    exit 1
+fi
+# Check if pip is available
+if ! command -v pip3 &> /dev/null; then
+    echo "❌ pip3 is not installed. Please install pip3."
+    exit 1
+fi
+# Check if virtual environment exists
+if [ ! -d "venv" ]; then
+    echo "📦 Creating virtual environment..."
+    python3 -m venv venv
+fi
+# Activate virtual environment
+echo "🔧 Activating virtual environment..."
+source venv/bin/activate
+# Install requirements if not already installed
+if [ ! -f "venv/installed" ]; then
+    echo "📥 Installing dependencies..."
+    pip install -r requirements.txt
+    touch venv/installed
+    echo "✅ Dependencies installed successfully!"
+fi
+# Start the application
+echo "🌟 Launching AI Notes Summarizer..."
+echo "📱 The application will open in your browser at http://localhost:8501"
+echo "⏹️  Press Ctrl+C to stop the application"
+echo ""
+streamlit run app.py

test_basic.py ADDED Viewed

	@@ -0,0 +1,143 @@

+#!/usr/bin/env python3
+"""
+Basic tests for AI Notes Summarizer modules
+"""
+import sys
+import os
+# Add the current directory to Python path
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+def test_imports():
+    """Test if all modules can be imported"""
+    print("Testing module imports...")
+    try:
+        from modules.pdf_processor import PDFProcessor
+        print("✅ PDF Processor imported successfully")
+    except ImportError as e:
+        print(f"❌ Failed to import PDF Processor: {e}")
+        return False
+    try:
+        from modules.text_summarizer import TextSummarizer
+        print("✅ Text Summarizer imported successfully")
+    except ImportError as e:
+        print(f"❌ Failed to import Text Summarizer: {e}")
+        return False
+    try:
+        from modules.utils import setup_logging, validate_input
+        print("✅ Utils imported successfully")
+    except ImportError as e:
+        print(f"❌ Failed to import Utils: {e}")
+        return False
+    return True
+def test_pdf_processor():
+    """Test PDF processor basic functionality"""
+    print("\nTesting PDF Processor...")
+    try:
+        from modules.pdf_processor import PDFProcessor
+        processor = PDFProcessor()
+        # Test text preprocessing
+        test_text = "This is a   test\n\nwith multiple   spaces\nand newlines."
+        cleaned = processor.preprocess_text(test_text)
+        print(f"✅ Text preprocessing works: '{cleaned}'")
+        return True
+    except Exception as e:
+        print(f"❌ PDF Processor test failed: {e}")
+        return False
+def test_text_summarizer():
+    """Test text summarizer basic functionality"""
+    print("\nTesting Text Summarizer...")
+    try:
+        from modules.text_summarizer import TextSummarizer
+        summarizer = TextSummarizer()
+        # Test text chunking without model loading
+        test_text = "This is a test sentence. " * 100
+        chunks = summarizer.chunk_text(test_text)
+        print(f"✅ Text chunking works: {len(chunks)} chunks created")
+        # Test bullet formatting
+        test_summary = "This is the first point. This is the second point. This is the third point."
+        bullets = summarizer.format_as_bullets(test_summary)
+        print(f"✅ Bullet formatting works:\n{bullets}")
+        return True
+    except Exception as e:
+        print(f"❌ Text Summarizer test failed: {e}")
+        return False
+def test_utils():
+    """Test utility functions"""
+    print("\nTesting Utils...")
+    try:
+        from modules.utils import validate_input, clean_text, format_file_size
+        # Test input validation
+        valid = validate_input(
+            "This is a test text that is long enough to pass validation."
+        )
+        print(f"✅ Input validation works: {valid}")
+        # Test text cleaning
+        dirty_text = "This   has    multiple   spaces  and  special@#$%characters!"
+        clean = clean_text(dirty_text)
+        print(f"✅ Text cleaning works: '{clean}'")
+        # Test file size formatting
+        size_str = format_file_size(1024 * 1024)
+        print(f"✅ File size formatting works: {size_str}")
+        return True
+    except Exception as e:
+        print(f"❌ Utils test failed: {e}")
+        return False
+def main():
+    """Run all tests"""
+    print("🧪 Running Basic Tests for AI Notes Summarizer\n")
+    tests = [test_imports, test_pdf_processor, test_text_summarizer, test_utils]
+    passed = 0
+    total = len(tests)
+    for test in tests:
+        if test():
+            passed += 1
+        print()
+    print(f"📊 Test Results: {passed}/{total} tests passed")
+    if passed == total:
+        print("🎉 All tests passed! The application is ready to run.")
+        return True
+    else:
+        print("⚠️ Some tests failed. Please check the errors above.")
+        return False
+if __name__ == "__main__":
+    success = main()
+    sys.exit(0 if success else 1)