Prateek-044 commited on
Commit
e9cd410
Β·
verified Β·
1 Parent(s): 20d5588

Upload 24 files

Browse files
Files changed (24) hide show
  1. .dockerignore +94 -0
  2. .env.example +34 -0
  3. .gitignore +156 -0
  4. CHANGELOG.md +145 -0
  5. CONTRIBUTING.md +267 -0
  6. DOCKER.md +306 -0
  7. Dockerfile +42 -7
  8. Dockerfile.prod +80 -0
  9. LICENSE +225 -0
  10. README.md +481 -20
  11. app.py +196 -0
  12. docker-build.sh +55 -0
  13. docker-compose.dev.yml +42 -0
  14. docker-compose.yml +46 -0
  15. docker-dev.sh +35 -0
  16. docker-run.sh +72 -0
  17. docker-test.sh +117 -0
  18. formatting.patch +65 -0
  19. requirements.docker.txt +27 -0
  20. requirements.txt +27 -3
  21. run.py +42 -0
  22. setup.py +44 -0
  23. start.sh +42 -0
  24. test_basic.py +143 -0
.dockerignore ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Git
2
+ .git
3
+ .gitignore
4
+ .gitattributes
5
+
6
+ # Docker
7
+ Dockerfile*
8
+ docker-compose*
9
+ .dockerignore
10
+
11
+ # Python
12
+ __pycache__/
13
+ *.py[cod]
14
+ *$py.class
15
+ *.so
16
+ .Python
17
+ build/
18
+ develop-eggs/
19
+ dist/
20
+ downloads/
21
+ eggs/
22
+ .eggs/
23
+ lib/
24
+ lib64/
25
+ parts/
26
+ sdist/
27
+ var/
28
+ wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+ MANIFEST
33
+
34
+ # Virtual environments
35
+ venv/
36
+ env/
37
+ ENV/
38
+ env.bak/
39
+ venv.bak/
40
+
41
+ # IDE
42
+ .vscode/
43
+ .idea/
44
+ *.swp
45
+ *.swo
46
+ *~
47
+
48
+ # OS
49
+ .DS_Store
50
+ .DS_Store?
51
+ ._*
52
+ .Spotlight-V100
53
+ .Trashes
54
+ ehthumbs.db
55
+ Thumbs.db
56
+
57
+ # Logs
58
+ *.log
59
+ logs/
60
+ app.log
61
+
62
+ # Testing
63
+ .pytest_cache/
64
+ .coverage
65
+ htmlcov/
66
+ .tox/
67
+ .cache
68
+ nosetests.xml
69
+ coverage.xml
70
+ *.cover
71
+ .hypothesis/
72
+
73
+ # Documentation
74
+ docs/
75
+ *.md
76
+ README.md
77
+
78
+ # Temporary files
79
+ *.tmp
80
+ *.temp
81
+ temp/
82
+ tmp/
83
+
84
+ # Model cache (will be downloaded at runtime)
85
+ .cache/
86
+ models/
87
+
88
+ # Uploads (runtime directory)
89
+ uploads/
90
+
91
+ # Environment files
92
+ .env
93
+ .env.local
94
+ .env.production
.env.example ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AI Notes Summarizer Environment Configuration
2
+
3
+ # Application Settings
4
+ APP_NAME=AI Notes Summarizer
5
+ APP_VERSION=1.0.0
6
+ DEBUG=false
7
+
8
+ # Streamlit Configuration
9
+ STREAMLIT_SERVER_PORT=8501
10
+ STREAMLIT_SERVER_ADDRESS=0.0.0.0
11
+ STREAMLIT_SERVER_HEADLESS=true
12
+ STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
13
+
14
+ # AI Model Configuration
15
+ DEFAULT_MODEL=facebook/bart-large-cnn
16
+ MODEL_CACHE_DIR=/app/.cache/huggingface
17
+ TRANSFORMERS_CACHE=/app/.cache/huggingface
18
+
19
+ # Processing Limits
20
+ MAX_FILE_SIZE_MB=10
21
+ MAX_TEXT_LENGTH=50000
22
+ CHUNK_SIZE=1024
23
+
24
+ # Security
25
+ ALLOWED_EXTENSIONS=pdf
26
+ MAX_UPLOAD_SIZE=10485760
27
+
28
+ # Logging
29
+ LOG_LEVEL=INFO
30
+ LOG_FILE=/app/logs/app.log
31
+
32
+ # Performance
33
+ TORCH_HOME=/app/.cache/torch
34
+ HF_HOME=/app/.cache/huggingface
.gitignore ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ pip-wheel-metadata/
20
+ share/python-wheels/
21
+ *.egg-info/
22
+ .installed.cfg
23
+ *.egg
24
+ MANIFEST
25
+
26
+ # PyInstaller
27
+ *.manifest
28
+ *.spec
29
+
30
+ # Installer logs
31
+ pip-log.txt
32
+ pip-delete-this-directory.txt
33
+
34
+ # Unit test / coverage reports
35
+ htmlcov/
36
+ .tox/
37
+ .nox/
38
+ .coverage
39
+ .coverage.*
40
+ .cache
41
+ nosetests.xml
42
+ coverage.xml
43
+ *.cover
44
+ *.py,cover
45
+ .hypothesis/
46
+ .pytest_cache/
47
+
48
+ # Translations
49
+ *.mo
50
+ *.pot
51
+
52
+ # Django stuff:
53
+ *.log
54
+ local_settings.py
55
+ db.sqlite3
56
+ db.sqlite3-journal
57
+
58
+ # Flask stuff:
59
+ instance/
60
+ .webassets-cache
61
+
62
+ # Scrapy stuff:
63
+ .scrapy
64
+
65
+ # Sphinx documentation
66
+ docs/_build/
67
+
68
+ # PyBuilder
69
+ target/
70
+
71
+ # Jupyter Notebook
72
+ .ipynb_checkpoints
73
+
74
+ # IPython
75
+ profile_default/
76
+ ipython_config.py
77
+
78
+ # pyenv
79
+ .python-version
80
+
81
+ # pipenv
82
+ Pipfile.lock
83
+
84
+ # PEP 582
85
+ __pypackages__/
86
+
87
+ # Celery stuff
88
+ celerybeat-schedule
89
+ celerybeat.pid
90
+
91
+ # SageMath parsed files
92
+ *.sage.py
93
+
94
+ # Environments
95
+ .env
96
+ .venv
97
+ env/
98
+ venv/
99
+ ENV/
100
+ env.bak/
101
+ venv.bak/
102
+
103
+ # Spyder project settings
104
+ .spyderproject
105
+ .spyproject
106
+
107
+ # Rope project settings
108
+ .ropeproject
109
+
110
+ # mkdocs documentation
111
+ /site
112
+
113
+ # mypy
114
+ .mypy_cache/
115
+ .dmypy.json
116
+ dmypy.json
117
+
118
+ # Pyre type checker
119
+ .pyre/
120
+
121
+ # Streamlit
122
+ .streamlit/secrets.toml
123
+
124
+ # AI Models Cache
125
+ .cache/
126
+ models/
127
+ *.bin
128
+ *.safetensors
129
+
130
+ # Uploads
131
+ uploads/
132
+ temp/
133
+
134
+ # Logs
135
+ logs/
136
+ *.log
137
+
138
+ # OS
139
+ .DS_Store
140
+ .DS_Store?
141
+ ._*
142
+ .Spotlight-V100
143
+ .Trashes
144
+ ehthumbs.db
145
+ Thumbs.db
146
+
147
+ # IDE
148
+ .vscode/
149
+ .idea/
150
+ *.swp
151
+ *.swo
152
+ *~
153
+
154
+ # Docker
155
+ .dockerignore
156
+ docker-compose.override.yml
CHANGELOG.md ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ### Added
11
+ - Initial project setup and documentation
12
+
13
+ ## [1.0.0] - 2025-01-17
14
+
15
+ ### Added
16
+ - πŸ“ **Core Features**
17
+ - PDF file upload and text extraction using PyPDF2
18
+ - Direct text input for summarization
19
+ - AI-powered summarization using Hugging Face Transformers (BART, T5, DistilBART)
20
+ - Bullet-point formatted summary output
21
+ - Real-time progress indicators during processing
22
+
23
+ - 🎨 **User Interface**
24
+ - Clean Streamlit web interface
25
+ - Tabbed layout for PDF upload and text input
26
+ - Model selection dropdown (BART, T5, DistilBART)
27
+ - Summary length customization (Short, Medium, Long)
28
+ - Statistics display (word count, compression ratio)
29
+ - Download functionality for generated summaries
30
+
31
+ - 🐳 **Docker Support**
32
+ - Multi-stage Dockerfile for optimized builds
33
+ - Docker Compose configuration for easy deployment
34
+ - Development Docker setup with live reload
35
+ - Production-optimized Docker configuration
36
+ - Comprehensive Docker documentation
37
+
38
+ - πŸ› οΈ **Development Tools**
39
+ - Modular code architecture with separate modules
40
+ - Comprehensive error handling and user feedback
41
+ - Basic testing framework
42
+ - Docker build and run scripts
43
+ - Development environment setup
44
+
45
+ - πŸ“š **Documentation**
46
+ - Detailed README with installation and usage instructions
47
+ - Docker deployment guide
48
+ - Troubleshooting section
49
+ - API documentation for modules
50
+
51
+ - πŸ”’ **Security & Performance**
52
+ - Non-root Docker container execution
53
+ - Input validation and file size limits
54
+ - Model caching for improved performance
55
+ - Resource limits and health checks
56
+
57
+ ### Technical Details
58
+ - **Backend**: Python 3.8+, Streamlit, Hugging Face Transformers, PyTorch
59
+ - **AI Models**: BART (facebook/bart-large-cnn), T5, DistilBART
60
+ - **PDF Processing**: PyPDF2 with comprehensive error handling
61
+ - **Containerization**: Docker with multi-stage builds
62
+ - **Architecture**: Modular design with separate PDF processing and summarization modules
63
+
64
+ ### Dependencies
65
+ - streamlit>=1.28.0
66
+ - transformers>=4.35.0
67
+ - torch>=2.0.0
68
+ - PyPDF2>=3.0.1
69
+ - Additional utilities for text processing and acceleration
70
+
71
+ ---
72
+
73
+ ## Release Notes
74
+
75
+ ### Version 1.0.0 Highlights
76
+
77
+ πŸŽ‰ **Initial Release** - NoteSnap is now available!
78
+
79
+ This first release provides a complete solution for document summarization with:
80
+ - **Easy-to-use web interface** built with Streamlit
81
+ - **Multiple AI models** for different use cases and performance needs
82
+ - **Docker support** for consistent deployment across environments
83
+ - **Comprehensive documentation** for users and developers
84
+
85
+ ### Supported Platforms
86
+ - **Local Installation**: Windows, macOS, Linux with Python 3.8+
87
+ - **Docker**: Any platform supporting Docker containers
88
+ - **Cloud Deployment**: Compatible with cloud platforms supporting Docker
89
+
90
+ ### Known Limitations
91
+ - PDF processing limited to text-based documents (no OCR for scanned images)
92
+ - Maximum file size limit of 10MB for PDF uploads
93
+ - Internet connection required for initial model downloads
94
+ - GPU acceleration optional but recommended for better performance
95
+
96
+ ### Upcoming Features (Roadmap)
97
+ - πŸ“± Mobile-responsive interface improvements
98
+ - πŸ” OCR support for scanned PDF documents
99
+ - 🌐 Multi-language summarization support
100
+ - πŸ“Š Advanced analytics and summary quality metrics
101
+ - πŸ”— API endpoints for programmatic access
102
+ - πŸ“± Progressive Web App (PWA) capabilities
103
+
104
+ ---
105
+
106
+ ## Migration Guide
107
+
108
+ ### From Development to Production
109
+ When deploying to production:
110
+
111
+ 1. **Use Docker Compose**:
112
+ ```bash
113
+ docker-compose up -d
114
+ ```
115
+
116
+ 2. **Configure Environment Variables**:
117
+ - Copy `.env.example` to `.env`
118
+ - Adjust settings for your environment
119
+
120
+ 3. **Set Resource Limits**:
121
+ - Ensure adequate memory (4GB+ recommended)
122
+ - Configure CPU limits based on expected load
123
+
124
+ ### Updating Dependencies
125
+ To update to newer versions:
126
+
127
+ ```bash
128
+ # Update Python packages
129
+ pip install -r requirements.txt --upgrade
130
+
131
+ # Rebuild Docker image
132
+ docker-compose build --no-cache
133
+ ```
134
+
135
+ ---
136
+
137
+ ## Support
138
+
139
+ For questions, issues, or contributions:
140
+ - πŸ› [Report Issues](https://github.com/PRATEEK-260/NoteSnap/issues)
141
+ - πŸ’¬ [Discussions](https://github.com/PRATEEK-260/NoteSnap/discussions)
142
+
143
+ ---
144
+
145
+ **Thank you for using NoteSnap!** πŸŽ‰
CONTRIBUTING.md ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🀝 Contributing to NoteSnap
2
+
3
+ Thank you for your interest in contributing to NoteSnap! This document provides guidelines and information for contributors.
4
+
5
+ ## πŸ“‹ Table of Contents
6
+
7
+ - [Code of Conduct](#code-of-conduct)
8
+ - [Getting Started](#getting-started)
9
+ - [Development Setup](#development-setup)
10
+ - [Making Changes](#making-changes)
11
+ - [Submitting Changes](#submitting-changes)
12
+ - [Style Guidelines](#style-guidelines)
13
+ - [Testing](#testing)
14
+ - [Documentation](#documentation)
15
+
16
+ ## πŸ“œ Code of Conduct
17
+
18
+ This project and everyone participating in it is governed by our commitment to creating a welcoming and inclusive environment. Please be respectful and constructive in all interactions.
19
+
20
+ ## πŸš€ Getting Started
21
+
22
+ ### Prerequisites
23
+
24
+ - Python 3.8 or higher
25
+ - Git
26
+ - Docker (optional but recommended)
27
+ - Basic knowledge of Python, Streamlit, and AI/ML concepts
28
+
29
+ ### Fork and Clone
30
+
31
+ 1. Fork the repository on GitHub
32
+ 2. Clone your fork locally:
33
+ ```bash
34
+ git clone https://github.com/YOUR-USERNAME/NoteSnap.git
35
+ cd NoteSnap
36
+ ```
37
+
38
+ ## πŸ› οΈ Development Setup
39
+
40
+ ### Local Development
41
+
42
+ 1. **Create a virtual environment:**
43
+ ```bash
44
+ python -m venv venv
45
+ source venv/bin/activate # On Windows: venv\Scripts\activate
46
+ ```
47
+
48
+ 2. **Install dependencies:**
49
+ ```bash
50
+ pip install -r requirements.txt
51
+ ```
52
+
53
+ 3. **Run the application:**
54
+ ```bash
55
+ streamlit run app.py
56
+ ```
57
+
58
+ ### Docker Development
59
+
60
+ 1. **Build and run with Docker:**
61
+ ```bash
62
+ ./docker-dev.sh
63
+ ```
64
+
65
+ 2. **Or use Docker Compose:**
66
+ ```bash
67
+ docker-compose -f docker-compose.dev.yml up
68
+ ```
69
+
70
+ ## πŸ”„ Making Changes
71
+
72
+ ### Branch Naming
73
+
74
+ Use descriptive branch names:
75
+ - `feature/add-new-model-support`
76
+ - `bugfix/fix-pdf-processing-error`
77
+ - `docs/update-installation-guide`
78
+ - `refactor/improve-error-handling`
79
+
80
+ ### Commit Messages
81
+
82
+ Follow conventional commit format:
83
+ ```
84
+ type(scope): description
85
+
86
+ [optional body]
87
+
88
+ [optional footer]
89
+ ```
90
+
91
+ Examples:
92
+ - `feat(summarizer): add support for T5 model`
93
+ - `fix(pdf): resolve text extraction encoding issue`
94
+ - `docs(readme): update installation instructions`
95
+
96
+ ## πŸ“€ Submitting Changes
97
+
98
+ ### Pull Request Process
99
+
100
+ 1. **Create a feature branch:**
101
+ ```bash
102
+ git checkout -b feature/your-feature-name
103
+ ```
104
+
105
+ 2. **Make your changes and commit:**
106
+ ```bash
107
+ git add .
108
+ git commit -m "feat: add your feature description"
109
+ ```
110
+
111
+ 3. **Push to your fork:**
112
+ ```bash
113
+ git push origin feature/your-feature-name
114
+ ```
115
+
116
+ 4. **Create a Pull Request** on GitHub with:
117
+ - Clear title and description
118
+ - Reference to related issues
119
+ - Screenshots if applicable
120
+ - Test results
121
+
122
+ ### Pull Request Requirements
123
+
124
+ - [ ] Code follows project style guidelines
125
+ - [ ] All tests pass
126
+ - [ ] Documentation updated if needed
127
+ - [ ] No breaking changes (or clearly documented)
128
+ - [ ] Self-review completed
129
+
130
+ ## 🎨 Style Guidelines
131
+
132
+ ### Python Code Style
133
+
134
+ - Follow PEP 8
135
+ - Use meaningful variable and function names
136
+ - Add docstrings for functions and classes
137
+ - Keep functions focused and small
138
+ - Use type hints where appropriate
139
+
140
+ ### Example:
141
+ ```python
142
+ def process_pdf_file(uploaded_file: UploadedFile) -> Optional[str]:
143
+ """
144
+ Extract text content from uploaded PDF file.
145
+
146
+ Args:
147
+ uploaded_file: Streamlit uploaded file object
148
+
149
+ Returns:
150
+ str: Extracted text content or None if extraction fails
151
+ """
152
+ # Implementation here
153
+ pass
154
+ ```
155
+
156
+ ### File Organization
157
+
158
+ - Keep modules focused on single responsibilities
159
+ - Use clear directory structure
160
+ - Add `__init__.py` files for packages
161
+ - Group related functionality together
162
+
163
+ ## πŸ§ͺ Testing
164
+
165
+ ### Running Tests
166
+
167
+ ```bash
168
+ # Basic functionality tests
169
+ python test_basic.py
170
+
171
+ # Docker tests
172
+ ./docker-test.sh
173
+
174
+ # Manual testing checklist
175
+ # - PDF upload and processing
176
+ # - Text input and summarization
177
+ # - Different AI models
178
+ # - Error handling scenarios
179
+ ```
180
+
181
+ ### Writing Tests
182
+
183
+ - Add tests for new features
184
+ - Test edge cases and error conditions
185
+ - Use descriptive test names
186
+ - Keep tests independent and isolated
187
+
188
+ ## πŸ“š Documentation
189
+
190
+ ### Code Documentation
191
+
192
+ - Add docstrings to all functions and classes
193
+ - Include type hints
194
+ - Comment complex logic
195
+ - Update README.md for new features
196
+
197
+ ### User Documentation
198
+
199
+ - Update usage instructions
200
+ - Add examples for new features
201
+ - Include troubleshooting information
202
+ - Keep Docker documentation current
203
+
204
+ ## πŸ› Reporting Issues
205
+
206
+ When reporting bugs:
207
+
208
+ 1. Use the bug report template
209
+ 2. Include environment details
210
+ 3. Provide steps to reproduce
211
+ 4. Add relevant logs and screenshots
212
+ 5. Check for existing similar issues
213
+
214
+ ## πŸ’‘ Suggesting Features
215
+
216
+ When suggesting features:
217
+
218
+ 1. Use the feature request template
219
+ 2. Explain the use case and motivation
220
+ 3. Consider implementation complexity
221
+ 4. Provide mockups or examples if helpful
222
+
223
+ ## 🏷️ Issue Labels
224
+
225
+ - `bug` - Something isn't working
226
+ - `enhancement` - New feature or request
227
+ - `documentation` - Improvements or additions to docs
228
+ - `good first issue` - Good for newcomers
229
+ - `help wanted` - Extra attention is needed
230
+ - `question` - Further information is requested
231
+
232
+ ## 🎯 Areas for Contribution
233
+
234
+ ### High Priority
235
+ - Bug fixes and stability improvements
236
+ - Performance optimizations
237
+ - Better error handling
238
+ - Documentation improvements
239
+
240
+ ### Medium Priority
241
+ - New AI model integrations
242
+ - UI/UX enhancements
243
+ - Additional file format support
244
+ - Internationalization
245
+
246
+ ### Low Priority
247
+ - Code refactoring
248
+ - Additional testing
249
+ - Development tooling
250
+ - CI/CD improvements
251
+
252
+ ## πŸ“ž Getting Help
253
+
254
+ - πŸ’¬ [GitHub Discussions](https://github.com/PRATEEK-260/NoteSnap/discussions)
255
+ - πŸ› [Issues](https://github.com/PRATEEK-260/NoteSnap/issues)
256
+
257
+ ## πŸ™ Recognition
258
+
259
+ Contributors will be:
260
+ - Listed in the README.md
261
+ - Mentioned in release notes
262
+ - Given credit in commit messages
263
+ - Invited to be maintainers (for significant contributions)
264
+
265
+ ---
266
+
267
+ Thank you for contributing to NoteSnap! πŸŽ‰
DOCKER.md ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🐳 Docker Deployment Guide
2
+
3
+ This guide covers Docker deployment options for the AI Notes Summarizer application.
4
+
5
+ ## πŸ“‹ Prerequisites
6
+
7
+ - Docker Engine 20.10+
8
+ - Docker Compose 2.0+
9
+ - At least 4GB RAM available for Docker
10
+ - Internet connection for downloading AI models
11
+
12
+ ## πŸš€ Quick Start
13
+
14
+ ### Using Docker Compose (Recommended)
15
+
16
+ ```bash
17
+ # Clone the repository
18
+ git clone <repository-url>
19
+ cd ai-notes-summarizer
20
+
21
+ # Start the application
22
+ docker-compose up -d
23
+
24
+ # Access at http://localhost:8501
25
+ ```
26
+
27
+ ### Using Docker Scripts
28
+
29
+ ```bash
30
+ # Build the image
31
+ ./docker-build.sh
32
+
33
+ # Run the container
34
+ ./docker-run.sh
35
+
36
+ # Test the deployment
37
+ ./docker-test.sh
38
+ ```
39
+
40
+ ## πŸ“ Docker Files Overview
41
+
42
+ | File | Purpose |
43
+ |------|---------|
44
+ | `Dockerfile` | Standard multi-stage build |
45
+ | `Dockerfile.prod` | Production-optimized build |
46
+ | `docker-compose.yml` | Production deployment |
47
+ | `docker-compose.dev.yml` | Development environment |
48
+ | `docker-build.sh` | Build script |
49
+ | `docker-run.sh` | Run script |
50
+ | `docker-dev.sh` | Development script |
51
+ | `docker-test.sh` | Testing script |
52
+
53
+ ## πŸ”§ Configuration
54
+
55
+ ### Environment Variables
56
+
57
+ | Variable | Default | Description |
58
+ |----------|---------|-------------|
59
+ | `STREAMLIT_SERVER_PORT` | 8501 | Application port |
60
+ | `STREAMLIT_SERVER_ADDRESS` | 0.0.0.0 | Bind address |
61
+ | `TRANSFORMERS_CACHE` | /app/.cache/huggingface | Model cache directory |
62
+ | `MAX_FILE_SIZE_MB` | 10 | Maximum PDF file size |
63
+ | `TORCH_HOME` | /app/.cache/torch | PyTorch cache |
64
+
65
+ ### Volume Mounts
66
+
67
+ | Volume | Purpose |
68
+ |--------|---------|
69
+ | `model_cache` | Persistent AI model storage |
70
+ | `logs` | Application logs |
71
+ | `uploads` | Temporary file storage |
72
+
73
+ ## πŸ—οΈ Build Options
74
+
75
+ ### Standard Build
76
+ ```bash
77
+ docker build -t ai-notes-summarizer .
78
+ ```
79
+
80
+ ### Production Build
81
+ ```bash
82
+ docker build -f Dockerfile.prod -t ai-notes-summarizer:prod .
83
+ ```
84
+
85
+ ### Development Build
86
+ ```bash
87
+ docker build --target dependencies -t ai-notes-summarizer:dev .
88
+ ```
89
+
90
+ ## πŸš€ Deployment Options
91
+
92
+ ### 1. Docker Compose (Production)
93
+
94
+ ```yaml
95
+ # docker-compose.yml
96
+ version: '3.8'
97
+ services:
98
+ ai-notes-summarizer:
99
+ image: ai-notes-summarizer:latest
100
+ ports:
101
+ - "8501:8501"
102
+ volumes:
103
+ - model_cache:/app/.cache
104
+ - logs:/app/logs
105
+ restart: unless-stopped
106
+ ```
107
+
108
+ ### 2. Docker Swarm
109
+
110
+ ```bash
111
+ # Initialize swarm
112
+ docker swarm init
113
+
114
+ # Deploy stack
115
+ docker stack deploy -c docker-compose.yml ai-notes-stack
116
+ ```
117
+
118
+ ### 3. Kubernetes
119
+
120
+ ```yaml
121
+ # k8s-deployment.yaml
122
+ apiVersion: apps/v1
123
+ kind: Deployment
124
+ metadata:
125
+ name: ai-notes-summarizer
126
+ spec:
127
+ replicas: 2
128
+ selector:
129
+ matchLabels:
130
+ app: ai-notes-summarizer
131
+ template:
132
+ metadata:
133
+ labels:
134
+ app: ai-notes-summarizer
135
+ spec:
136
+ containers:
137
+ - name: ai-notes-summarizer
138
+ image: ai-notes-summarizer:latest
139
+ ports:
140
+ - containerPort: 8501
141
+ resources:
142
+ limits:
143
+ memory: "4Gi"
144
+ cpu: "2"
145
+ requests:
146
+ memory: "2Gi"
147
+ cpu: "1"
148
+ ```
149
+
150
+ ## πŸ” Monitoring and Logging
151
+
152
+ ### Health Checks
153
+ ```bash
154
+ # Check container health
155
+ docker ps --filter "name=ai-notes-summarizer"
156
+
157
+ # View health check logs
158
+ docker inspect ai-notes-summarizer | grep -A 10 Health
159
+ ```
160
+
161
+ ### Logs
162
+ ```bash
163
+ # View application logs
164
+ docker-compose logs -f
165
+
166
+ # View specific service logs
167
+ docker logs -f ai-notes-summarizer
168
+ ```
169
+
170
+ ### Metrics
171
+ ```bash
172
+ # Container stats
173
+ docker stats ai-notes-summarizer
174
+
175
+ # Resource usage
176
+ docker exec ai-notes-summarizer df -h
177
+ docker exec ai-notes-summarizer free -h
178
+ ```
179
+
180
+ ## πŸ› οΈ Development
181
+
182
+ ### Development Environment
183
+ ```bash
184
+ # Start development environment with live reload
185
+ docker-compose -f docker-compose.dev.yml up
186
+
187
+ # Or use the script
188
+ ./docker-dev.sh
189
+ ```
190
+
191
+ ### Debugging
192
+ ```bash
193
+ # Access container shell
194
+ docker exec -it ai-notes-summarizer bash
195
+
196
+ # View application files
197
+ docker exec ai-notes-summarizer ls -la /app
198
+
199
+ # Check Python environment
200
+ docker exec ai-notes-summarizer pip list
201
+ ```
202
+
203
+ ## πŸ”’ Security
204
+
205
+ ### Security Features
206
+ - Non-root user execution
207
+ - Minimal base image
208
+ - No unnecessary packages
209
+ - Health checks enabled
210
+ - Resource limits configured
211
+
212
+ ### Security Scanning
213
+ ```bash
214
+ # Scan for vulnerabilities (if you have docker scan)
215
+ docker scan ai-notes-summarizer:latest
216
+
217
+ # Check running processes
218
+ docker exec ai-notes-summarizer ps aux
219
+ ```
220
+
221
+ ## 🚨 Troubleshooting
222
+
223
+ ### Common Issues
224
+
225
+ 1. **Container won't start**
226
+ ```bash
227
+ docker logs ai-notes-summarizer
228
+ ```
229
+
230
+ 2. **Out of memory**
231
+ ```bash
232
+ # Increase Docker memory limit
233
+ docker update --memory=4g ai-notes-summarizer
234
+ ```
235
+
236
+ 3. **Model download fails**
237
+ ```bash
238
+ # Check internet connectivity
239
+ docker exec ai-notes-summarizer curl -I https://huggingface.co
240
+ ```
241
+
242
+ 4. **Permission issues**
243
+ ```bash
244
+ # Fix ownership
245
+ docker exec -u root ai-notes-summarizer chown -R app:app /app
246
+ ```
247
+
248
+ ### Performance Optimization
249
+
250
+ 1. **Use multi-stage builds** (already implemented)
251
+ 2. **Enable BuildKit**:
252
+ ```bash
253
+ export DOCKER_BUILDKIT=1
254
+ docker build .
255
+ ```
256
+ 3. **Use .dockerignore** (already included)
257
+ 4. **Pin dependency versions** (see requirements.docker.txt)
258
+
259
+ ## πŸ“Š Resource Requirements
260
+
261
+ ### Minimum Requirements
262
+ - CPU: 1 core
263
+ - RAM: 2GB
264
+ - Storage: 5GB
265
+
266
+ ### Recommended Requirements
267
+ - CPU: 2 cores
268
+ - RAM: 4GB
269
+ - Storage: 10GB
270
+
271
+ ### Production Requirements
272
+ - CPU: 4 cores
273
+ - RAM: 8GB
274
+ - Storage: 20GB
275
+ - Load balancer for multiple instances
276
+
277
+ ## πŸ”„ Updates and Maintenance
278
+
279
+ ### Updating the Application
280
+ ```bash
281
+ # Pull latest changes
282
+ git pull
283
+
284
+ # Rebuild and restart
285
+ docker-compose up --build -d
286
+
287
+ # Or use rolling update
288
+ docker-compose up -d --force-recreate
289
+ ```
290
+
291
+ ### Backup and Restore
292
+ ```bash
293
+ # Backup volumes
294
+ docker run --rm -v ai-notes-model-cache:/data -v $(pwd):/backup alpine tar czf /backup/model-cache-backup.tar.gz -C /data .
295
+
296
+ # Restore volumes
297
+ docker run --rm -v ai-notes-model-cache:/data -v $(pwd):/backup alpine tar xzf /backup/model-cache-backup.tar.gz -C /data
298
+ ```
299
+
300
+ ## πŸ“ž Support
301
+
302
+ For Docker-specific issues:
303
+ 1. Check container logs: `docker logs ai-notes-summarizer`
304
+ 2. Verify resource limits: `docker stats`
305
+ 3. Test connectivity: `docker exec ai-notes-summarizer curl localhost:8501`
306
+ 4. Review Docker documentation: https://docs.docker.com
Dockerfile CHANGED
@@ -1,20 +1,55 @@
1
- FROM python:3.13.5-slim
 
 
2
 
3
- WORKDIR /app
 
 
 
 
4
 
 
5
  RUN apt-get update && apt-get install -y \
6
  build-essential \
7
  curl \
 
8
  git \
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- RUN pip3 install -r requirements.txt
 
15
 
 
16
  EXPOSE 8501
17
 
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
 
 
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
1
+ # Multi-stage Dockerfile for AI Notes Summarizer
2
+ # Stage 1: Base image with system dependencies
3
+ FROM python:3.10-slim as base
4
 
5
+ # Set environment variables
6
+ ENV PYTHONUNBUFFERED=1 \
7
+ PYTHONDONTWRITEBYTECODE=1 \
8
+ PIP_NO_CACHE_DIR=1 \
9
+ PIP_DISABLE_PIP_VERSION_CHECK=1
10
 
11
+ # Install system dependencies
12
  RUN apt-get update && apt-get install -y \
13
  build-essential \
14
  curl \
15
+ software-properties-common \
16
  git \
17
  && rm -rf /var/lib/apt/lists/*
18
 
19
+ # Create non-root user for security
20
+ RUN useradd --create-home --shell /bin/bash app
21
+
22
+ # Stage 2: Dependencies installation
23
+ FROM base as dependencies
24
+
25
+ # Set working directory
26
+ WORKDIR /app
27
+
28
+ # Copy requirements first for better caching
29
+ COPY requirements.txt .
30
+
31
+ # Install Python dependencies
32
+ RUN pip install --no-cache-dir -r requirements.txt
33
+
34
+ # Stage 3: Application
35
+ FROM dependencies as application
36
+
37
+ # Copy application code
38
+ COPY --chown=app:app . .
39
+
40
+ # Create necessary directories
41
+ RUN mkdir -p /app/uploads /app/logs && \
42
+ chown -R app:app /app
43
 
44
+ # Switch to non-root user
45
+ USER app
46
 
47
+ # Expose port
48
  EXPOSE 8501
49
 
50
+ # Health check
51
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
52
+ CMD curl -f http://localhost:8501/_stcore/health || exit 1
53
 
54
+ # Default command
55
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.headless=true", "--server.fileWatcherType=none", "--browser.gatherUsageStats=false"]
Dockerfile.prod ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Production-optimized Dockerfile for AI Notes Summarizer
2
+ FROM python:3.10-slim as base
3
+
4
+ # Build arguments
5
+ ARG BUILD_DATE
6
+ ARG VCS_REF
7
+ ARG VERSION=1.0.0
8
+
9
+ # Labels for metadata
10
+ LABEL maintainer="AI Notes Summarizer Team" \
11
+ org.label-schema.build-date=$BUILD_DATE \
12
+ org.label-schema.name="ai-notes-summarizer" \
13
+ org.label-schema.description="AI-powered document summarization application" \
14
+ org.label-schema.url="https://github.com/your-repo/ai-notes-summarizer" \
15
+ org.label-schema.vcs-ref=$VCS_REF \
16
+ org.label-schema.version=$VERSION \
17
+ org.label-schema.schema-version="1.0"
18
+
19
+ # Set environment variables for production
20
+ ENV PYTHONUNBUFFERED=1 \
21
+ PYTHONDONTWRITEBYTECODE=1 \
22
+ PIP_NO_CACHE_DIR=1 \
23
+ PIP_DISABLE_PIP_VERSION_CHECK=1 \
24
+ STREAMLIT_SERVER_HEADLESS=true \
25
+ STREAMLIT_BROWSER_GATHER_USAGE_STATS=false \
26
+ TRANSFORMERS_CACHE=/app/.cache/huggingface \
27
+ TORCH_HOME=/app/.cache/torch \
28
+ HF_HOME=/app/.cache/huggingface
29
+
30
+ # Install system dependencies and clean up in one layer
31
+ RUN apt-get update && apt-get install -y --no-install-recommends \
32
+ build-essential \
33
+ curl \
34
+ git \
35
+ && rm -rf /var/lib/apt/lists/* \
36
+ && apt-get clean
37
+
38
+ # Create non-root user
39
+ RUN useradd --create-home --shell /bin/bash --uid 1000 app
40
+
41
+ # Stage 2: Dependencies
42
+ FROM base as dependencies
43
+
44
+ WORKDIR /app
45
+
46
+ # Copy requirements and install Python dependencies
47
+ COPY requirements.txt .
48
+ RUN pip install --no-cache-dir --upgrade pip && \
49
+ pip install --no-cache-dir -r requirements.txt && \
50
+ pip cache purge
51
+
52
+ # Stage 3: Application
53
+ FROM dependencies as application
54
+
55
+ # Copy application code with proper ownership
56
+ COPY --chown=app:app . .
57
+
58
+ # Create necessary directories and set permissions
59
+ RUN mkdir -p /app/.cache /app/logs /app/uploads && \
60
+ chown -R app:app /app && \
61
+ chmod +x /app/*.sh 2>/dev/null || true
62
+
63
+ # Switch to non-root user
64
+ USER app
65
+
66
+ # Expose port
67
+ EXPOSE 8501
68
+
69
+ # Health check
70
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
71
+ CMD curl -f http://localhost:8501/_stcore/health || exit 1
72
+
73
+ # Run the application
74
+ CMD ["streamlit", "run", "app.py", \
75
+ "--server.port=8501", \
76
+ "--server.address=0.0.0.0", \
77
+ "--server.headless=true", \
78
+ "--server.fileWatcherType=none", \
79
+ "--browser.gatherUsageStats=false", \
80
+ "--server.maxUploadSize=10"]
LICENSE ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
+ MIT License
3
+
4
+ Copyright (c) 2025 Muhammed Midlaj
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
23
+ =======
24
+ Apache License
25
+ Version 2.0, January 2004
26
+ http://www.apache.org/licenses/
27
+
28
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
29
+
30
+ 1. Definitions.
31
+
32
+ "License" shall mean the terms and conditions for use, reproduction,
33
+ and distribution as defined by Sections 1 through 9 of this document.
34
+
35
+ "Licensor" shall mean the copyright owner or entity authorized by
36
+ the copyright owner that is granting the License.
37
+
38
+ "Legal Entity" shall mean the union of the acting entity and all
39
+ other entities that control, are controlled by, or are under common
40
+ control with that entity. For the purposes of this definition,
41
+ "control" means (i) the power, direct or indirect, to cause the
42
+ direction or management of such entity, whether by contract or
43
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
44
+ outstanding shares, or (iii) beneficial ownership of such entity.
45
+
46
+ "You" (or "Your") shall mean an individual or Legal Entity
47
+ exercising permissions granted by this License.
48
+
49
+ "Source" form shall mean the preferred form for making modifications,
50
+ including but not limited to software source code, documentation
51
+ source, and configuration files.
52
+
53
+ "Object" form shall mean any form resulting from mechanical
54
+ transformation or translation of a Source form, including but
55
+ not limited to compiled object code, generated documentation,
56
+ and conversions to other media types.
57
+
58
+ "Work" shall mean the work of authorship, whether in Source or
59
+ Object form, made available under the License, as indicated by a
60
+ copyright notice that is included in or attached to the work
61
+ (an example is provided in the Appendix below).
62
+
63
+ "Derivative Works" shall mean any work, whether in Source or Object
64
+ form, that is based on (or derived from) the Work and for which the
65
+ editorial revisions, annotations, elaborations, or other modifications
66
+ represent, as a whole, an original work of authorship. For the purposes
67
+ of this License, Derivative Works shall not include works that remain
68
+ separable from, or merely link (or bind by name) to the interfaces of,
69
+ the Work and Derivative Works thereof.
70
+
71
+ "Contribution" shall mean any work of authorship, including
72
+ the original version of the Work and any modifications or additions
73
+ to that Work or Derivative Works thereof, that is intentionally
74
+ submitted to Licensor for inclusion in the Work by the copyright owner
75
+ or by an individual or Legal Entity authorized to submit on behalf of
76
+ the copyright owner. For the purposes of this definition, "submitted"
77
+ means any form of electronic, verbal, or written communication sent
78
+ to the Licensor or its representatives, including but not limited to
79
+ communication on electronic mailing lists, source code control systems,
80
+ and issue tracking systems that are managed by, or on behalf of, the
81
+ Licensor for the purpose of discussing and improving the Work, but
82
+ excluding communication that is conspicuously marked or otherwise
83
+ designated in writing by the copyright owner as "Not a Contribution."
84
+
85
+ "Contributor" shall mean Licensor and any individual or Legal Entity
86
+ on behalf of whom a Contribution has been received by Licensor and
87
+ subsequently incorporated within the Work.
88
+
89
+ 2. Grant of Copyright License. Subject to the terms and conditions of
90
+ this License, each Contributor hereby grants to You a perpetual,
91
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
92
+ copyright license to reproduce, prepare Derivative Works of,
93
+ publicly display, publicly perform, sublicense, and distribute the
94
+ Work and such Derivative Works in Source or Object form.
95
+
96
+ 3. Grant of Patent License. Subject to the terms and conditions of
97
+ this License, each Contributor hereby grants to You a perpetual,
98
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
99
+ (except as stated in this section) patent license to make, have made,
100
+ use, offer to sell, sell, import, and otherwise transfer the Work,
101
+ where such license applies only to those patent claims licensable
102
+ by such Contributor that are necessarily infringed by their
103
+ Contribution(s) alone or by combination of their Contribution(s)
104
+ with the Work to which such Contribution(s) was submitted. If You
105
+ institute patent litigation against any entity (including a
106
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
107
+ or a Contribution incorporated within the Work constitutes direct
108
+ or contributory patent infringement, then any patent licenses
109
+ granted to You under this License for that Work shall terminate
110
+ as of the date such litigation is filed.
111
+
112
+ 4. Redistribution. You may reproduce and distribute copies of the
113
+ Work or Derivative Works thereof in any medium, with or without
114
+ modifications, and in Source or Object form, provided that You
115
+ meet the following conditions:
116
+
117
+ (a) You must give any other recipients of the Work or
118
+ Derivative Works a copy of this License; and
119
+
120
+ (b) You must cause any modified files to carry prominent notices
121
+ stating that You changed the files; and
122
+
123
+ (c) You must retain, in the Source form of any Derivative Works
124
+ that You distribute, all copyright, patent, trademark, and
125
+ attribution notices from the Source form of the Work,
126
+ excluding those notices that do not pertain to any part of
127
+ the Derivative Works; and
128
+
129
+ (d) If the Work includes a "NOTICE" text file as part of its
130
+ distribution, then any Derivative Works that You distribute must
131
+ include a readable copy of the attribution notices contained
132
+ within such NOTICE file, excluding those notices that do not
133
+ pertain to any part of the Derivative Works, in at least one
134
+ of the following places: within a NOTICE text file distributed
135
+ as part of the Derivative Works; within the Source form or
136
+ documentation, if provided along with the Derivative Works; or,
137
+ within a display generated by the Derivative Works, if and
138
+ wherever such third-party notices normally appear. The contents
139
+ of the NOTICE file are for informational purposes only and
140
+ do not modify the License. You may add Your own attribution
141
+ notices within Derivative Works that You distribute, alongside
142
+ or as an addendum to the NOTICE text from the Work, provided
143
+ that such additional attribution notices cannot be construed
144
+ as modifying the License.
145
+
146
+ You may add Your own copyright statement to Your modifications and
147
+ may provide additional or different license terms and conditions
148
+ for use, reproduction, or distribution of Your modifications, or
149
+ for any such Derivative Works as a whole, provided Your use,
150
+ reproduction, and distribution of the Work otherwise complies with
151
+ the conditions stated in this License.
152
+
153
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
154
+ any Contribution intentionally submitted for inclusion in the Work
155
+ by You to the Licensor shall be under the terms and conditions of
156
+ this License, without any additional terms or conditions.
157
+ Notwithstanding the above, nothing herein shall supersede or modify
158
+ the terms of any separate license agreement you may have executed
159
+ with Licensor regarding such Contributions.
160
+
161
+ 6. Trademarks. This License does not grant permission to use the trade
162
+ names, trademarks, service marks, or product names of the Licensor,
163
+ except as required for reasonable and customary use in describing the
164
+ origin of the Work and reproducing the content of the NOTICE file.
165
+
166
+ 7. Disclaimer of Warranty. Unless required by applicable law or
167
+ agreed to in writing, Licensor provides the Work (and each
168
+ Contributor provides its Contributions) on an "AS IS" BASIS,
169
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
170
+ implied, including, without limitation, any warranties or conditions
171
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
172
+ PARTICULAR PURPOSE. You are solely responsible for determining the
173
+ appropriateness of using or redistributing the Work and assume any
174
+ risks associated with Your exercise of permissions under this License.
175
+
176
+ 8. Limitation of Liability. In no event and under no legal theory,
177
+ whether in tort (including negligence), contract, or otherwise,
178
+ unless required by applicable law (such as deliberate and grossly
179
+ negligent acts) or agreed to in writing, shall any Contributor be
180
+ liable to You for damages, including any direct, indirect, special,
181
+ incidental, or consequential damages of any character arising as a
182
+ result of this License or out of the use or inability to use the
183
+ Work (including but not limited to damages for loss of goodwill,
184
+ work stoppage, computer failure or malfunction, or any and all
185
+ other commercial damages or losses), even if such Contributor
186
+ has been advised of the possibility of such damages.
187
+
188
+ 9. Accepting Warranty or Additional Liability. While redistributing
189
+ the Work or Derivative Works thereof, You may choose to offer,
190
+ and charge a fee for, acceptance of support, warranty, indemnity,
191
+ or other liability obligations and/or rights consistent with this
192
+ License. However, in accepting such obligations, You may act only
193
+ on Your own behalf and on Your sole responsibility, not on behalf
194
+ of any other Contributor, and only if You agree to indemnify,
195
+ defend, and hold each Contributor harmless for any liability
196
+ incurred by, or claims asserted against, such Contributor by reason
197
+ of your accepting any such warranty or additional liability.
198
+
199
+ END OF TERMS AND CONDITIONS
200
+
201
+ APPENDIX: How to apply the Apache License to your work.
202
+
203
+ To apply the Apache License to your work, attach the following
204
+ boilerplate notice, with the fields enclosed by brackets "[]"
205
+ replaced with your own identifying information. (Don't include
206
+ the brackets!) The text should be enclosed in the appropriate
207
+ comment syntax for the file format. We also recommend that a
208
+ file or class name and description of purpose be included on the
209
+ same "printed page" as the copyright notice for easier
210
+ identification within third-party archives.
211
+
212
+ Copyright [yyyy] [name of copyright owner]
213
+
214
+ Licensed under the Apache License, Version 2.0 (the "License");
215
+ you may not use this file except in compliance with the License.
216
+ You may obtain a copy of the License at
217
+
218
+ http://www.apache.org/licenses/LICENSE-2.0
219
+
220
+ Unless required by applicable law or agreed to in writing, software
221
+ distributed under the License is distributed on an "AS IS" BASIS,
222
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
223
+ See the License for the specific language governing permissions and
224
+ limitations under the License.
225
+ >>>>>>> 9b4f2dab9437daaefabf059cd647a5761c93c197
README.md CHANGED
@@ -1,20 +1,481 @@
1
- ---
2
- title: NoteSnap
3
- emoji: πŸš€
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: Streamlit template space
12
- license: apache-2.0
13
- ---
14
-
15
- # Welcome to Streamlit!
16
-
17
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
18
-
19
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
20
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
+ # πŸ“ NoteSnap
3
+
4
+ <div align="center">
5
+
6
+ ![NoteSnap Logo](https://img.shields.io/badge/πŸ“-NoteSnap-blue?style=for-the-badge)
7
+
8
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
9
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
10
+ [![Streamlit](https://img.shields.io/badge/Streamlit-FF4B4B?style=flat&logo=streamlit&logoColor=white)](https://streamlit.io/)
11
+ [![Docker](https://img.shields.io/badge/Docker-2496ED?style=flat&logo=docker&logoColor=white)](https://www.docker.com/)
12
+ [![Transformers](https://img.shields.io/badge/πŸ€—%20Transformers-FFD21E?style=flat)](https://huggingface.co/transformers/)
13
+
14
+ [![GitHub stars](https://img.shields.io/github/stars/PRATEEK-260/NoteSnap?style=social)](https://github.com/PRATEEK-260/NoteSnap/stargazers)
15
+ [![GitHub forks](https://img.shields.io/github/forks/PRATEEK-260/NoteSnap?style=social)](https://github.com/PRATEEK-260/NoteSnap/network/members)
16
+ [![GitHub issues](https://img.shields.io/github/issues/PRATEEK-260/NoteSnap)](https://github.com/PRATEEK-260/NoteSnap/issues)
17
+
18
+ </div>
19
+
20
+ A powerful web application that transforms lengthy documents and notes into concise, bullet-point summaries using state-of-the-art AI models.
21
+
22
+ ---
23
+
24
+ ## πŸ“‹ Table of Contents
25
+
26
+ - [✨ Features](#-features)
27
+ - [πŸš€ Quick Start](#-quick-start)
28
+ - [Option 1: Docker (Recommended)](#option-1-docker-recommended)
29
+ - [Option 2: Local Installation](#option-2-local-installation)
30
+ - [πŸ“– Usage Guide](#-usage-guide)
31
+ - [πŸ–ΌοΈ Screenshots](#️-screenshots)
32
+ - [πŸ› οΈ Technical Details](#️-technical-details)
33
+ - [🐳 Docker Deployment](#-docker-deployment)
34
+ - [πŸ”§ Configuration](#-configuration)
35
+ - [🚨 Troubleshooting](#-troubleshooting)
36
+ - [🀝 Contributing](#-contributing)
37
+ - [πŸ“„ License](#-license)
38
+ - [πŸ™ Acknowledgments](#-acknowledgments)
39
+ - [πŸ“ž Support](#-support)
40
+
41
+ ---
42
+
43
+ ## ✨ Features
44
+
45
+ - **PDF Processing**: Upload PDF files and extract text content automatically
46
+ - **Direct Text Input**: Paste text content directly for immediate summarization
47
+ - **AI-Powered Summarization**: Uses Hugging Face Transformers (BART, T5) for high-quality summaries
48
+ - **Bullet-Point Format**: Clean, readable bullet-point summaries
49
+ - **Multiple AI Models**: Choose from different pre-trained models
50
+ - **Customizable Length**: Adjust summary length (Short, Medium, Long)
51
+ - **Progress Tracking**: Real-time progress indicators during processing
52
+ - **Download Summaries**: Save generated summaries as text files
53
+ - **Statistics**: View compression ratios and word counts
54
+ - **Error Handling**: Comprehensive error handling and user feedback
55
+
56
+ ## πŸš€ Quick Start
57
+
58
+ ### 🌐 Try Online (Fastest)
59
+ **[πŸš€ Live Demo on Hugging Face Spaces](https://huggingface.co/spaces/PRATEEK-260/NoteSnap)**
60
+ - No installation required
61
+ - Instant access in your browser
62
+ - Full functionality available
63
+
64
+ ### Option 1: Docker (Recommended)
65
+
66
+ #### Prerequisites
67
+ - Docker and Docker Compose installed
68
+ - Internet connection (for downloading AI models)
69
+
70
+ #### Using Docker Compose (Easiest)
71
+ ```bash
72
+ # Clone the repository
73
+ git clone https://github.com/PRATEEK-260/NoteSnap.git
74
+ cd NoteSnap
75
+
76
+ # Start the application
77
+ docker-compose up -d
78
+
79
+ # Access the application at http://localhost:8501
80
+ ```
81
+
82
+ #### Using Docker Scripts
83
+ ```bash
84
+ # Build the Docker image
85
+ ./docker-build.sh
86
+
87
+ # Run the container
88
+ ./docker-run.sh
89
+
90
+ # For development with live code reloading
91
+ ./docker-dev.sh
92
+ ```
93
+
94
+ #### Manual Docker Commands
95
+ ```bash
96
+ # Build the image
97
+ docker build -t notesnap .
98
+
99
+ # Run the container
100
+ docker run -p 8501:8501 notesnap
101
+ ```
102
+
103
+ ### Option 2: Local Installation
104
+
105
+ #### Prerequisites
106
+ - Python 3.8 or higher
107
+ - pip (Python package installer)
108
+ - Internet connection (for downloading AI models)
109
+
110
+ #### Installation Steps
111
+ 1. **Clone the repository**
112
+ ```bash
113
+ git clone https://github.com/PRATEEK-260/NoteSnap.git
114
+ cd NoteSnap
115
+ ```
116
+
117
+ 2. **Install dependencies**
118
+ ```bash
119
+ pip install -r requirements.txt
120
+ ```
121
+
122
+ 3. **Run the application**
123
+ ```bash
124
+ streamlit run app.py
125
+ ```
126
+
127
+ 4. **Open your browser**
128
+ - The application will automatically open at `http://localhost:8501`
129
+ - If it doesn't open automatically, navigate to the URL manually
130
+
131
+ ## πŸ“– Usage Guide
132
+
133
+ ### PDF Summarization
134
+
135
+ 1. **Upload PDF**: Click on the "πŸ“„ PDF Upload" tab
136
+ 2. **Select File**: Choose a PDF file (max 10MB)
137
+ 3. **Process**: Click "πŸ“– Extract & Summarize PDF"
138
+ 4. **Review**: View the extracted text preview
139
+ 5. **Get Summary**: The AI will generate a bullet-point summary
140
+ 6. **Download**: Save the summary using the download button
141
+
142
+ ### Text Summarization
143
+
144
+ 1. **Input Text**: Click on the "πŸ“ Text Input" tab
145
+ 2. **Paste Content**: Enter or paste your text (minimum 100 characters)
146
+ 3. **Summarize**: Click "πŸš€ Summarize Text"
147
+ 4. **Review**: View the generated summary
148
+ 5. **Download**: Save the summary as needed
149
+
150
+ ### Settings
151
+
152
+ - **AI Model**: Choose from BART (recommended), T5, or DistilBART
153
+ - **Summary Length**: Select Short, Medium, or Long summaries
154
+ - **Statistics**: View word counts and compression ratios
155
+
156
+ ## πŸ› οΈ Technical Details
157
+
158
+ ### Architecture
159
+
160
+ ```
161
+ NoteSnap/
162
+ β”œβ”€β”€ app.py # Main Streamlit application
163
+ β”œβ”€β”€ modules/
164
+ β”‚ β”œβ”€β”€ __init__.py
165
+ β”‚ β”œβ”€β”€ pdf_processor.py # PDF text extraction
166
+ β”‚ β”œβ”€β”€ text_summarizer.py # AI summarization
167
+ β”‚ └── utils.py # Utility functions
168
+ β”œβ”€β”€ requirements.txt # Python dependencies
169
+ └── README.md # This file
170
+ ```
171
+
172
+ ### AI Models
173
+
174
+ - **BART (facebook/bart-large-cnn)**: Best quality, recommended for most use cases
175
+ - **T5 Small**: Faster processing, good for shorter texts
176
+ - **DistilBART**: Balanced performance and speed
177
+
178
+ ### Dependencies
179
+
180
+ - **Streamlit**: Web application framework
181
+ - **Transformers**: Hugging Face AI models
182
+ - **PyTorch**: Deep learning framework
183
+ - **PyPDF2**: PDF text extraction
184
+ - **Additional utilities**: See `requirements.txt`
185
+
186
+ ## πŸ”§ Configuration
187
+
188
+ ### Model Selection
189
+
190
+ You can change the default model by modifying the `TextSummarizer` initialization in `app.py`:
191
+
192
+ ```python
193
+ text_summarizer = TextSummarizer(model_name="your-preferred-model")
194
+ ```
195
+
196
+ ### Summary Length
197
+
198
+ Adjust default summary lengths in `modules/text_summarizer.py`:
199
+
200
+ ```python
201
+ self.min_summary_length = 50 # Minimum words
202
+ self.max_summary_length = 300 # Maximum words
203
+ ```
204
+
205
+ ### File Size Limits
206
+
207
+ Modify PDF file size limits in `modules/pdf_processor.py`:
208
+
209
+ ```python
210
+ self.max_file_size = 10 * 1024 * 1024 # 10MB
211
+ ```
212
+
213
+ ## 🚨 Troubleshooting
214
+
215
+ ### Common Issues
216
+
217
+ 1. **Model Loading Errors**
218
+ - Ensure stable internet connection
219
+ - Check available disk space (models can be 1-2GB)
220
+ - Try switching to a smaller model (T5 Small or DistilBART)
221
+
222
+ 2. **PDF Processing Issues**
223
+ - Ensure PDF is not encrypted
224
+ - Check if PDF contains readable text (not just images)
225
+ - Try with a smaller PDF file
226
+
227
+ 3. **Memory Errors**
228
+ - Reduce text length
229
+ - Close other applications
230
+ - Try using CPU instead of GPU
231
+
232
+ 4. **Slow Performance**
233
+ - Use GPU if available
234
+ - Choose smaller models for faster processing
235
+ - Process shorter text chunks
236
+
237
+ ### Error Messages
238
+
239
+ - **"Text is too short"**: Minimum 100 characters required
240
+ - **"No readable text found"**: PDF may contain only images
241
+ - **"Model loading error"**: Check internet connection
242
+ - **"Out of memory"**: Reduce text length or restart application
243
+
244
+ ## 🎯 Best Practices
245
+
246
+ ### For Best Results
247
+
248
+ 1. **Text Quality**: Use well-formatted, coherent text
249
+ 2. **Length**: Optimal text length is 500-5000 words
250
+ 3. **Content**: Works best with structured content (articles, reports, notes)
251
+ 4. **Model Choice**: Use BART for academic/formal content, T5 for general text
252
+
253
+ ### Performance Tips
254
+
255
+ 1. **GPU Usage**: Enable CUDA for faster processing
256
+ 2. **Batch Processing**: Process multiple documents separately
257
+ 3. **Model Caching**: Models are cached after first load
258
+ 4. **Text Preprocessing**: Clean text improves summary quality
259
+
260
+ ## πŸ–ΌοΈ Screenshots
261
+
262
+ <div align="center">
263
+
264
+ ### Main Interface
265
+ ![Main Interface](Screenshots/Main%20interface.png)
266
+ *Clean and intuitive interface with PDF upload and text input options*
267
+
268
+ ### PDF Processing
269
+ ![PDF Processing](Screenshots/pdf%20processing.png)
270
+ *Real-time PDF processing with progress indicators*
271
+
272
+ ### Summary Results
273
+ ![Summary Results](Screenshots/Summery%20Result.png)
274
+ *Bullet-point summaries with statistics and download options*
275
+
276
+ ### Settings Panel
277
+ ![Settings Panel](Screenshots/settings%20panel.png)
278
+ *Customizable AI model selection and summary length options*
279
+
280
+ </div>
281
+
282
+ ## πŸŽ₯ Demo
283
+
284
+ πŸš€ **[Live Demo](https://huggingface.co/spaces/PRATEEK-260/NoteSnap)** - Try it now on Hugging Face Spaces!
285
+
286
+ ## πŸ“„ License
287
+
288
+ This project is open source and available under the MIT License.
289
+
290
+ ## 🀝 Contributing
291
+
292
+ Contributions are welcome! Please feel free to submit issues, feature requests, or pull requests.
293
+
294
+ ## 🐳 Docker Deployment
295
+
296
+ ### Production Deployment
297
+
298
+ For production deployment, use the standard Docker Compose configuration:
299
+
300
+ ```bash
301
+ # Start in production mode
302
+ docker-compose up -d
303
+
304
+ # View logs
305
+ docker-compose logs -f
306
+
307
+ # Stop the application
308
+ docker-compose down
309
+
310
+ # Update the application
311
+ docker-compose pull
312
+ docker-compose up -d
313
+ ```
314
+
315
+ ### Development Mode
316
+
317
+ For development with live code reloading:
318
+
319
+ ```bash
320
+ # Start development environment
321
+ docker-compose -f docker-compose.dev.yml up
322
+
323
+ # Or use the convenience script
324
+ ./docker-dev.sh
325
+ ```
326
+
327
+ ### Docker Configuration
328
+
329
+ #### Environment Variables
330
+ - `STREAMLIT_SERVER_PORT`: Port for the application (default: 8501)
331
+ - `TRANSFORMERS_CACHE`: Cache directory for AI models
332
+ - `MAX_FILE_SIZE_MB`: Maximum PDF file size (default: 10MB)
333
+
334
+ #### Volumes
335
+ - `model_cache`: Persistent storage for downloaded AI models
336
+ - `logs`: Application logs
337
+ - `uploads`: Temporary file storage (optional)
338
+
339
+ #### Resource Limits
340
+ - Memory: 4GB limit, 2GB reserved
341
+ - CPU: 2 cores limit, 1 core reserved
342
+
343
+ ### Docker Troubleshooting
344
+
345
+ 1. **Container won't start**: Check logs with `docker-compose logs`
346
+ 2. **Out of memory**: Increase Docker memory limits
347
+ 3. **Model download fails**: Ensure internet connectivity
348
+ 4. **Permission issues**: Check file ownership and Docker user settings
349
+
350
+ ## 🀝 Contributing
351
+
352
+ We welcome contributions from the community! Here's how you can help:
353
+
354
+ ### 🌟 Ways to Contribute
355
+
356
+ - ⭐ **Star this repository** if you find it useful
357
+ - πŸ› **Report bugs** by opening an [issue](https://github.com/PRATEEK-260/NoteSnap/issues)
358
+ - πŸ’‘ **Suggest features** or improvements
359
+ - πŸ“– **Improve documentation**
360
+ - πŸ”§ **Submit pull requests** with bug fixes or new features
361
+
362
+ ### πŸš€ Getting Started
363
+
364
+ 1. **Fork the repository**
365
+ ```bash
366
+ # Click the "Fork" button on GitHub, then:
367
+ git clone https://github.com/YOUR-USERNAME/NoteSnap.git
368
+ cd NoteSnap
369
+ ```
370
+
371
+ 2. **Create a feature branch**
372
+ ```bash
373
+ git checkout -b feature/amazing-feature
374
+ ```
375
+
376
+ 3. **Make your changes**
377
+ - Follow the existing code style
378
+ - Add tests for new features
379
+ - Update documentation as needed
380
+
381
+ 4. **Test your changes**
382
+ ```bash
383
+ # Run basic tests
384
+ python test_basic.py
385
+
386
+ # Test Docker build
387
+ ./docker-test.sh
388
+ ```
389
+
390
+ 5. **Submit a pull request**
391
+ ```bash
392
+ git add .
393
+ git commit -m "Add amazing feature"
394
+ git push origin feature/amazing-feature
395
+ ```
396
+
397
+ ### πŸ“‹ Development Guidelines
398
+
399
+ - **Code Style**: Follow PEP 8 for Python code
400
+ - **Documentation**: Update README.md for new features
401
+ - **Testing**: Add tests for new functionality
402
+ - **Docker**: Ensure Docker compatibility
403
+ - **Dependencies**: Keep requirements.txt updated
404
+
405
+ ### πŸ› Reporting Issues
406
+
407
+ When reporting issues, please include:
408
+
409
+ - **Environment details** (OS, Python version, Docker version)
410
+ - **Steps to reproduce** the issue
411
+ - **Expected vs actual behavior**
412
+ - **Error messages** or logs
413
+ - **Screenshots** if applicable
414
+
415
+ [**Report an Issue β†’**](https://github.com/PRATEEK-260/NoteSnap/issues/new)
416
+
417
+ ### πŸ’¬ Discussions
418
+
419
+ Join our community discussions:
420
+
421
+ - [**GitHub Discussions**](https://github.com/PRATEEK-260/NoteSnap/discussions) - General questions and ideas
422
+ - [**Issues**](https://github.com/PRATEEK-260/NoteSnap/issues) - Bug reports and feature requests
423
+
424
+ ## πŸ“„ License
425
+
426
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
427
+
428
+ ## πŸ™ Acknowledgments
429
+
430
+ ### πŸ› οΈ Built With
431
+
432
+ - [**Streamlit**](https://streamlit.io/) - Web application framework
433
+ - [**Hugging Face Transformers**](https://huggingface.co/transformers/) - AI/ML models
434
+ - [**PyTorch**](https://pytorch.org/) - Deep learning framework
435
+ - [**PyPDF2**](https://pypdf2.readthedocs.io/) - PDF processing
436
+ - [**Docker**](https://www.docker.com/) - Containerization
437
+
438
+ ### 🎯 Inspiration
439
+
440
+ - Inspired by the need for efficient document summarization
441
+ - Built to help students, researchers, and professionals save time
442
+ - Leverages state-of-the-art AI models for high-quality summaries
443
+
444
+ ### πŸ€– AI Models
445
+
446
+ Special thanks to the teams behind these amazing models:
447
+ - [**BART**](https://huggingface.co/facebook/bart-large-cnn) by Facebook AI
448
+ - [**T5**](https://huggingface.co/t5-small) by Google Research
449
+ - [**DistilBART**](https://huggingface.co/sshleifer/distilbart-cnn-12-6) by Sam Shleifer
450
+
451
+ ## πŸ“ž Support
452
+
453
+ If you encounter any issues or have questions:
454
+
455
+ ### πŸ” Self-Help Resources
456
+
457
+ 1. πŸ“– Check the [troubleshooting section](#-troubleshooting) above
458
+ 2. πŸ› Review error messages for specific guidance
459
+ 3. πŸ“¦ Ensure all dependencies are properly installed
460
+ 4. πŸ”„ Try with different models or settings
461
+ 5. 🐳 For Docker issues, check container logs: `docker-compose logs`
462
+
463
+ ### πŸ’¬ Get Help
464
+
465
+ - πŸ› **Bug Reports**: [Open an Issue](https://github.com/PRATEEK-260/NoteSnap/issues/new)
466
+ - πŸ’‘ **Feature Requests**: [Start a Discussion](https://github.com/PRATEEK-260/NoteSnap/discussions)
467
+
468
+ ---
469
+
470
+ <div align="center">
471
+
472
+ **Made with ❀️ by [PRATEEK-260](https://github.com/PRATEEK-260)**
473
+
474
+ **Happy Summarizing! πŸ“βœ¨**
475
+
476
+ [![GitHub](https://img.shields.io/badge/GitHub-PRATEEK--260-181717?style=flat&logo=github)](https://github.com/PRATEEK-260)
477
+
478
+ </div>
479
+ =======
480
+ # NoteSnap
481
+ >>>>>>> 9b4f2dab9437daaefabf059cd647a5761c93c197
app.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ NoteSnap - Main Application
3
+ A Streamlit web application for summarizing PDF files and text content using AI.
4
+ """
5
+
6
+ import streamlit as st
7
+ import os
8
+ from pathlib import Path
9
+
10
+ # Import custom modules
11
+ from modules.pdf_processor import PDFProcessor
12
+ from modules.text_summarizer import TextSummarizer
13
+ from modules.utils import (
14
+ setup_logging,
15
+ validate_input,
16
+ display_summary_stats,
17
+ format_file_size,
18
+ )
19
+
20
+
21
+ # Initialize components
22
+ @st.cache_resource
23
+ def initialize_components():
24
+ """Initialize PDF processor and text summarizer"""
25
+ pdf_processor = PDFProcessor()
26
+ text_summarizer = TextSummarizer()
27
+ return pdf_processor, text_summarizer
28
+
29
+
30
+ def main():
31
+ """Main application function"""
32
+ st.set_page_config(
33
+ page_title="NoteSnap",
34
+ page_icon="πŸ“",
35
+ layout="wide",
36
+ initial_sidebar_state="expanded",
37
+ )
38
+
39
+ # Initialize components
40
+ pdf_processor, text_summarizer = initialize_components()
41
+
42
+ # App header
43
+ st.title("πŸ“ NoteSnap")
44
+ st.markdown(
45
+ "Transform your lengthy documents and notes into concise, bullet-point summaries using AI."
46
+ )
47
+
48
+ # Sidebar for options
49
+ st.sidebar.header("βš™οΈ Settings")
50
+
51
+ # Model selection
52
+ model_options = {
53
+ "BART (Recommended)": "facebook/bart-large-cnn",
54
+ "T5 Small": "t5-small",
55
+ "DistilBART": "sshleifer/distilbart-cnn-12-6",
56
+ }
57
+
58
+ selected_model = st.sidebar.selectbox(
59
+ "Choose AI Model:",
60
+ options=list(model_options.keys()),
61
+ index=0,
62
+ help="BART is recommended for best quality summaries",
63
+ )
64
+
65
+ # Update text summarizer model if changed
66
+ if text_summarizer.model_name != model_options[selected_model]:
67
+ text_summarizer.model_name = model_options[selected_model]
68
+ text_summarizer.summarizer = None # Reset to reload model
69
+
70
+ # Summary length options
71
+ summary_length = st.sidebar.select_slider(
72
+ "Summary Length:",
73
+ options=["Short", "Medium", "Long"],
74
+ value="Medium",
75
+ help="Choose the desired length of the summary",
76
+ )
77
+
78
+ # Update summary length settings
79
+ length_settings = {"Short": (30, 150), "Medium": (50, 300), "Long": (100, 500)}
80
+ text_summarizer.min_summary_length, text_summarizer.max_summary_length = (
81
+ length_settings[summary_length]
82
+ )
83
+
84
+ # Main content area
85
+ tab1, tab2 = st.tabs(["πŸ“„ PDF Upload", "πŸ“ Text Input"])
86
+
87
+ with tab1:
88
+ st.header("Upload PDF File")
89
+ st.markdown("Upload a PDF file to extract and summarize its content.")
90
+
91
+ uploaded_file = st.file_uploader(
92
+ "Choose a PDF file", type=["pdf"], help="Upload a PDF file (max 10MB)"
93
+ )
94
+
95
+ if uploaded_file is not None:
96
+ # Display file info
97
+ file_size = format_file_size(uploaded_file.size)
98
+ st.info(f"πŸ“„ **File:** {uploaded_file.name} ({file_size})")
99
+
100
+ # Process PDF button
101
+ if st.button("πŸ“– Extract & Summarize PDF", type="primary"):
102
+ with st.spinner("Processing PDF file..."):
103
+ # Extract text from PDF
104
+ extracted_text = pdf_processor.process_pdf(uploaded_file)
105
+
106
+ if extracted_text:
107
+ st.success("βœ… Text extracted successfully!")
108
+
109
+ # Show extracted text preview
110
+ with st.expander("πŸ“ View Extracted Text (Preview)"):
111
+ st.text_area(
112
+ "Extracted Content:",
113
+ value=(
114
+ extracted_text[:1000] + "..."
115
+ if len(extracted_text) > 1000
116
+ else extracted_text
117
+ ),
118
+ height=200,
119
+ disabled=True,
120
+ )
121
+
122
+ # Generate summary
123
+ summary = text_summarizer.summarize_text(extracted_text)
124
+
125
+ if summary:
126
+ st.success("βœ… Summary generated successfully!")
127
+
128
+ # Display summary
129
+ st.subheader("πŸ“‹ Summary")
130
+ st.markdown(summary)
131
+
132
+ # Display statistics
133
+ st.subheader("πŸ“Š Statistics")
134
+ display_summary_stats(extracted_text, summary)
135
+
136
+ # Download option
137
+ st.download_button(
138
+ label="πŸ’Ύ Download Summary",
139
+ data=summary,
140
+ file_name=f"{uploaded_file.name}_summary.txt",
141
+ mime="text/plain",
142
+ )
143
+
144
+ with tab2:
145
+ st.header("Direct Text Input")
146
+ st.markdown("Paste your text content directly for summarization.")
147
+
148
+ text_input = st.text_area(
149
+ "Enter your text here:",
150
+ height=300,
151
+ placeholder="Paste your text content here...",
152
+ help="Minimum 100 characters required for effective summarization",
153
+ )
154
+
155
+ # Character count
156
+ char_count = len(text_input)
157
+ st.caption(f"Characters: {char_count:,}")
158
+
159
+ if st.button("πŸš€ Summarize Text", type="primary"):
160
+ if validate_input(text_input, min_length=100):
161
+ # Generate summary
162
+ summary = text_summarizer.summarize_text(text_input)
163
+
164
+ if summary:
165
+ st.success("βœ… Summary generated successfully!")
166
+
167
+ # Display summary
168
+ st.subheader("πŸ“‹ Summary")
169
+ st.markdown(summary)
170
+
171
+ # Display statistics
172
+ st.subheader("πŸ“Š Statistics")
173
+ display_summary_stats(text_input, summary)
174
+
175
+ # Download option
176
+ st.download_button(
177
+ label="πŸ’Ύ Download Summary",
178
+ data=summary,
179
+ file_name="text_summary.txt",
180
+ mime="text/plain",
181
+ )
182
+
183
+ # Footer
184
+ st.markdown("---")
185
+ st.markdown(
186
+ """
187
+ <div style='text-align: center; color: #666;'>
188
+ <p>NoteSnap | Powered by Hugging Face Transformers</p>
189
+ </div>
190
+ """,
191
+ unsafe_allow_html=True,
192
+ )
193
+
194
+
195
+ if __name__ == "__main__":
196
+ main()
docker-build.sh ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # AI Notes Summarizer - Docker Build Script
4
+ set -e
5
+
6
+ # Colors for output
7
+ RED='\033[0;31m'
8
+ GREEN='\033[0;32m'
9
+ YELLOW='\033[1;33m'
10
+ BLUE='\033[0;34m'
11
+ NC='\033[0m' # No Color
12
+
13
+ # Configuration
14
+ IMAGE_NAME="ai-notes-summarizer"
15
+ TAG="${1:-latest}"
16
+ DOCKERFILE="${2:-Dockerfile}"
17
+
18
+ echo -e "${BLUE}🐳 Building AI Notes Summarizer Docker Image${NC}"
19
+ echo -e "${YELLOW}Image: ${IMAGE_NAME}:${TAG}${NC}"
20
+ echo -e "${YELLOW}Dockerfile: ${DOCKERFILE}${NC}"
21
+ echo ""
22
+
23
+ # Check if Docker is running
24
+ if ! docker info > /dev/null 2>&1; then
25
+ echo -e "${RED}❌ Docker is not running. Please start Docker and try again.${NC}"
26
+ exit 1
27
+ fi
28
+
29
+ # Build the image
30
+ echo -e "${BLUE}πŸ“¦ Building Docker image...${NC}"
31
+ docker build \
32
+ -t "${IMAGE_NAME}:${TAG}" \
33
+ -f "${DOCKERFILE}" \
34
+ --build-arg BUILD_DATE="$(date -u +'%Y-%m-%dT%H:%M:%SZ')" \
35
+ --build-arg VCS_REF="$(git rev-parse --short HEAD 2>/dev/null || echo 'unknown')" \
36
+ .
37
+
38
+ if [ $? -eq 0 ]; then
39
+ echo -e "${GREEN}βœ… Docker image built successfully!${NC}"
40
+ echo ""
41
+
42
+ # Show image info
43
+ echo -e "${BLUE}πŸ“Š Image Information:${NC}"
44
+ docker images "${IMAGE_NAME}:${TAG}" --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}\t{{.CreatedAt}}"
45
+ echo ""
46
+
47
+ echo -e "${GREEN}πŸš€ To run the container:${NC}"
48
+ echo -e "${YELLOW}docker run -p 8501:8501 ${IMAGE_NAME}:${TAG}${NC}"
49
+ echo ""
50
+ echo -e "${GREEN}πŸ™ Or use Docker Compose:${NC}"
51
+ echo -e "${YELLOW}docker-compose up${NC}"
52
+ else
53
+ echo -e "${RED}❌ Docker build failed!${NC}"
54
+ exit 1
55
+ fi
docker-compose.dev.yml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ ai-notes-summarizer-dev:
5
+ build:
6
+ context: .
7
+ dockerfile: Dockerfile
8
+ target: dependencies # Stop at dependencies stage for faster rebuilds
9
+ container_name: ai-notes-summarizer-dev
10
+ ports:
11
+ - "8501:8501"
12
+ environment:
13
+ - STREAMLIT_SERVER_PORT=8501
14
+ - STREAMLIT_SERVER_ADDRESS=0.0.0.0
15
+ - STREAMLIT_SERVER_HEADLESS=true
16
+ - STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
17
+ - TRANSFORMERS_CACHE=/app/.cache/huggingface
18
+ - TORCH_HOME=/app/.cache/torch
19
+ - HF_HOME=/app/.cache/huggingface
20
+ - DEBUG=true
21
+ volumes:
22
+ # Mount source code for live development
23
+ - .:/app
24
+ # Model cache persistence
25
+ - model_cache_dev:/app/.cache
26
+ # Logs
27
+ - ./logs:/app/logs
28
+ # Uploads
29
+ - ./uploads:/app/uploads
30
+ working_dir: /app
31
+ command: ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.headless=true", "--server.fileWatcherType=poll"]
32
+ restart: unless-stopped
33
+ user: "1000:1000" # Use host user ID to avoid permission issues
34
+ deploy:
35
+ resources:
36
+ limits:
37
+ memory: 4G
38
+ cpus: '2.0'
39
+
40
+ volumes:
41
+ model_cache_dev:
42
+ driver: local
docker-compose.yml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ ai-notes-summarizer:
5
+ build:
6
+ context: .
7
+ dockerfile: Dockerfile
8
+ container_name: ai-notes-summarizer
9
+ ports:
10
+ - "8501:8501"
11
+ environment:
12
+ - STREAMLIT_SERVER_PORT=8501
13
+ - STREAMLIT_SERVER_ADDRESS=0.0.0.0
14
+ - STREAMLIT_SERVER_HEADLESS=true
15
+ - STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
16
+ - TRANSFORMERS_CACHE=/app/.cache/huggingface
17
+ - TORCH_HOME=/app/.cache/torch
18
+ - HF_HOME=/app/.cache/huggingface
19
+ volumes:
20
+ # Model cache persistence
21
+ - model_cache:/app/.cache
22
+ # Logs persistence
23
+ - logs:/app/logs
24
+ # Optional: Mount local uploads directory for development
25
+ # - ./uploads:/app/uploads
26
+ restart: unless-stopped
27
+ healthcheck:
28
+ test: ["CMD", "curl", "-f", "http://localhost:8501/_stcore/health"]
29
+ interval: 30s
30
+ timeout: 10s
31
+ retries: 3
32
+ start_period: 40s
33
+ deploy:
34
+ resources:
35
+ limits:
36
+ memory: 4G
37
+ cpus: '2.0'
38
+ reservations:
39
+ memory: 2G
40
+ cpus: '1.0'
41
+
42
+ volumes:
43
+ model_cache:
44
+ driver: local
45
+ logs:
46
+ driver: local
docker-dev.sh ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # AI Notes Summarizer - Docker Development Script
4
+ set -e
5
+
6
+ # Colors for output
7
+ RED='\033[0;31m'
8
+ GREEN='\033[0;32m'
9
+ YELLOW='\033[1;33m'
10
+ BLUE='\033[0;34m'
11
+ NC='\033[0m' # No Color
12
+
13
+ echo -e "${BLUE}πŸ› οΈ Starting AI Notes Summarizer in Development Mode${NC}"
14
+ echo ""
15
+
16
+ # Check if Docker is running
17
+ if ! docker info > /dev/null 2>&1; then
18
+ echo -e "${RED}❌ Docker is not running. Please start Docker and try again.${NC}"
19
+ exit 1
20
+ fi
21
+
22
+ # Check if docker-compose is available
23
+ if ! command -v docker-compose &> /dev/null; then
24
+ echo -e "${RED}❌ docker-compose is not installed. Please install it and try again.${NC}"
25
+ exit 1
26
+ fi
27
+
28
+ # Create necessary directories
29
+ mkdir -p logs uploads
30
+
31
+ # Start development environment
32
+ echo -e "${BLUE}🐳 Starting development environment...${NC}"
33
+ docker-compose -f docker-compose.dev.yml up --build
34
+
35
+ echo -e "${GREEN}βœ… Development environment stopped.${NC}"
docker-run.sh ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # AI Notes Summarizer - Docker Run Script
4
+ set -e
5
+
6
+ # Colors for output
7
+ RED='\033[0;31m'
8
+ GREEN='\033[0;32m'
9
+ YELLOW='\033[1;33m'
10
+ BLUE='\033[0;34m'
11
+ NC='\033[0m' # No Color
12
+
13
+ # Configuration
14
+ IMAGE_NAME="ai-notes-summarizer"
15
+ TAG="${1:-latest}"
16
+ CONTAINER_NAME="ai-notes-summarizer-app"
17
+ PORT="${2:-8501}"
18
+
19
+ echo -e "${BLUE}πŸš€ Running AI Notes Summarizer Docker Container${NC}"
20
+ echo -e "${YELLOW}Image: ${IMAGE_NAME}:${TAG}${NC}"
21
+ echo -e "${YELLOW}Port: ${PORT}${NC}"
22
+ echo ""
23
+
24
+ # Check if Docker is running
25
+ if ! docker info > /dev/null 2>&1; then
26
+ echo -e "${RED}❌ Docker is not running. Please start Docker and try again.${NC}"
27
+ exit 1
28
+ fi
29
+
30
+ # Check if image exists
31
+ if ! docker image inspect "${IMAGE_NAME}:${TAG}" > /dev/null 2>&1; then
32
+ echo -e "${RED}❌ Image ${IMAGE_NAME}:${TAG} not found. Please build it first:${NC}"
33
+ echo -e "${YELLOW}./docker-build.sh${NC}"
34
+ exit 1
35
+ fi
36
+
37
+ # Stop and remove existing container if it exists
38
+ if docker ps -a --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then
39
+ echo -e "${YELLOW}πŸ›‘ Stopping existing container...${NC}"
40
+ docker stop "${CONTAINER_NAME}" > /dev/null 2>&1 || true
41
+ docker rm "${CONTAINER_NAME}" > /dev/null 2>&1 || true
42
+ fi
43
+
44
+ # Create directories for volumes
45
+ mkdir -p logs uploads
46
+
47
+ # Run the container
48
+ echo -e "${BLUE}🐳 Starting container...${NC}"
49
+ docker run -d \
50
+ --name "${CONTAINER_NAME}" \
51
+ -p "${PORT}:8501" \
52
+ -v "$(pwd)/logs:/app/logs" \
53
+ -v "$(pwd)/uploads:/app/uploads" \
54
+ -v ai-notes-model-cache:/app/.cache \
55
+ --restart unless-stopped \
56
+ "${IMAGE_NAME}:${TAG}"
57
+
58
+ if [ $? -eq 0 ]; then
59
+ echo -e "${GREEN}βœ… Container started successfully!${NC}"
60
+ echo ""
61
+ echo -e "${GREEN}🌐 Application URL: ${YELLOW}http://localhost:${PORT}${NC}"
62
+ echo -e "${GREEN}πŸ“Š Container Status:${NC}"
63
+ docker ps --filter "name=${CONTAINER_NAME}" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
64
+ echo ""
65
+ echo -e "${BLUE}πŸ“ Useful commands:${NC}"
66
+ echo -e "${YELLOW}View logs: docker logs -f ${CONTAINER_NAME}${NC}"
67
+ echo -e "${YELLOW}Stop container: docker stop ${CONTAINER_NAME}${NC}"
68
+ echo -e "${YELLOW}Remove container: docker rm ${CONTAINER_NAME}${NC}"
69
+ else
70
+ echo -e "${RED}❌ Failed to start container!${NC}"
71
+ exit 1
72
+ fi
docker-test.sh ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # AI Notes Summarizer - Docker Test Script
4
+ set -e
5
+
6
+ # Colors for output
7
+ RED='\033[0;31m'
8
+ GREEN='\033[0;32m'
9
+ YELLOW='\033[1;33m'
10
+ BLUE='\033[0;34m'
11
+ NC='\033[0m' # No Color
12
+
13
+ # Configuration
14
+ IMAGE_NAME="ai-notes-summarizer"
15
+ TAG="test"
16
+ CONTAINER_NAME="ai-notes-summarizer-test"
17
+ TEST_PORT="8502"
18
+
19
+ echo -e "${BLUE}πŸ§ͺ Testing AI Notes Summarizer Docker Setup${NC}"
20
+ echo ""
21
+
22
+ # Function to cleanup
23
+ cleanup() {
24
+ echo -e "${YELLOW}🧹 Cleaning up test resources...${NC}"
25
+ docker stop "${CONTAINER_NAME}" > /dev/null 2>&1 || true
26
+ docker rm "${CONTAINER_NAME}" > /dev/null 2>&1 || true
27
+ docker rmi "${IMAGE_NAME}:${TAG}" > /dev/null 2>&1 || true
28
+ }
29
+
30
+ # Trap cleanup on exit
31
+ trap cleanup EXIT
32
+
33
+ # Test 1: Build the Docker image
34
+ echo -e "${BLUE}πŸ“¦ Test 1: Building Docker image...${NC}"
35
+ if docker build -t "${IMAGE_NAME}:${TAG}" .; then
36
+ echo -e "${GREEN}βœ… Docker build successful${NC}"
37
+ else
38
+ echo -e "${RED}❌ Docker build failed${NC}"
39
+ exit 1
40
+ fi
41
+
42
+ # Test 2: Check image size
43
+ echo -e "${BLUE}πŸ“Š Test 2: Checking image size...${NC}"
44
+ IMAGE_SIZE=$(docker images "${IMAGE_NAME}:${TAG}" --format "{{.Size}}")
45
+ echo -e "${YELLOW}Image size: ${IMAGE_SIZE}${NC}"
46
+
47
+ # Test 3: Run container
48
+ echo -e "${BLUE}πŸš€ Test 3: Starting container...${NC}"
49
+ if docker run -d --name "${CONTAINER_NAME}" -p "${TEST_PORT}:8501" "${IMAGE_NAME}:${TAG}"; then
50
+ echo -e "${GREEN}βœ… Container started successfully${NC}"
51
+ else
52
+ echo -e "${RED}❌ Container failed to start${NC}"
53
+ exit 1
54
+ fi
55
+
56
+ # Test 4: Wait for application to be ready
57
+ echo -e "${BLUE}⏳ Test 4: Waiting for application to be ready...${NC}"
58
+ for i in {1..30}; do
59
+ if curl -f "http://localhost:${TEST_PORT}/_stcore/health" > /dev/null 2>&1; then
60
+ echo -e "${GREEN}βœ… Application is ready${NC}"
61
+ break
62
+ fi
63
+ if [ $i -eq 30 ]; then
64
+ echo -e "${RED}❌ Application failed to start within 30 seconds${NC}"
65
+ docker logs "${CONTAINER_NAME}"
66
+ exit 1
67
+ fi
68
+ sleep 1
69
+ done
70
+
71
+ # Test 5: Check application response
72
+ echo -e "${BLUE}🌐 Test 5: Testing application response...${NC}"
73
+ if curl -s "http://localhost:${TEST_PORT}" | grep -q "AI Notes Summarizer"; then
74
+ echo -e "${GREEN}βœ… Application responding correctly${NC}"
75
+ else
76
+ echo -e "${RED}❌ Application not responding correctly${NC}"
77
+ exit 1
78
+ fi
79
+
80
+ # Test 6: Check container logs for errors
81
+ echo -e "${BLUE}πŸ“ Test 6: Checking container logs...${NC}"
82
+ if docker logs "${CONTAINER_NAME}" 2>&1 | grep -i error; then
83
+ echo -e "${YELLOW}⚠️ Found errors in logs (see above)${NC}"
84
+ else
85
+ echo -e "${GREEN}βœ… No errors found in logs${NC}"
86
+ fi
87
+
88
+ # Test 7: Test Docker Compose
89
+ echo -e "${BLUE}πŸ™ Test 7: Testing Docker Compose...${NC}"
90
+ if docker-compose config > /dev/null 2>&1; then
91
+ echo -e "${GREEN}βœ… Docker Compose configuration is valid${NC}"
92
+ else
93
+ echo -e "${RED}❌ Docker Compose configuration is invalid${NC}"
94
+ exit 1
95
+ fi
96
+
97
+ # Test 8: Security scan (if available)
98
+ echo -e "${BLUE}πŸ”’ Test 8: Basic security check...${NC}"
99
+ if command -v docker &> /dev/null; then
100
+ # Check if running as non-root
101
+ USER_CHECK=$(docker exec "${CONTAINER_NAME}" whoami 2>/dev/null || echo "root")
102
+ if [ "$USER_CHECK" != "root" ]; then
103
+ echo -e "${GREEN}βœ… Container running as non-root user: ${USER_CHECK}${NC}"
104
+ else
105
+ echo -e "${YELLOW}⚠️ Container running as root user${NC}"
106
+ fi
107
+ fi
108
+
109
+ echo ""
110
+ echo -e "${GREEN}πŸŽ‰ All tests passed successfully!${NC}"
111
+ echo ""
112
+ echo -e "${BLUE}πŸ“Š Test Summary:${NC}"
113
+ echo -e "${YELLOW}Image: ${IMAGE_NAME}:${TAG}${NC}"
114
+ echo -e "${YELLOW}Size: ${IMAGE_SIZE}${NC}"
115
+ echo -e "${YELLOW}Test URL: http://localhost:${TEST_PORT}${NC}"
116
+ echo ""
117
+ echo -e "${BLUE}πŸš€ Ready for deployment!${NC}"
formatting.patch ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --- a/app.py
2
+ +++ b/app.py
3
+ @@ -193,4 +193,4 @@
4
+
5
+
6
+ if __name__ == "__main__":
7
+ - main()
8
+ + main()
9
+
10
+ --- a/modules/pdf_processor.py
11
+ +++ b/modules/pdf_processor.py
12
+ @@ -174,4 +174,4 @@
13
+ "The extracted text is very short. Please check if the PDF contains readable text."
14
+ )
15
+
16
+ - return processed_text
17
+ + return processed_text
18
+
19
+ --- a/modules/text_summarizer.py
20
+ +++ b/modules/text_summarizer.py
21
+ @@ -282,4 +282,4 @@
22
+ )
23
+ return None
24
+ except Exception as e:
25
+ - st.error(f"❌ Unexpected error during summarization: {str(e)}")
26
+ - return None
27
+ + st.error(f"❌ Unexpected error during summarization: {str(e)}")
28
+ + return None
29
+
30
+ --- a/modules/utils.py
31
+ +++ b/modules/utils.py
32
+ @@ -124,4 +124,4 @@
33
+
34
+ b64 = base64.b64encode(content.encode()).decode()
35
+ href = f'<a href="data:text/plain;base64,{b64}" download="{filename}">Download Summary</a>'
36
+ - return href
37
+ + return href
38
+
39
+ --- a/run.py
40
+ +++ b/run.py
41
+ @@ -39,4 +39,4 @@
42
+
43
+
44
+ if __name__ == "__main__":
45
+ - main()
46
+ + main()
47
+
48
+ --- a/setup.py
49
+ +++ b/setup.py
50
+ @@ -41,4 +41,4 @@
51
+ "ai-notes-summarizer=app:main",
52
+ ],
53
+ },
54
+ -)
55
+ +)
56
+
57
+ --- a/test_basic.py
58
+ +++ b/test_basic.py
59
+ @@ -140,4 +140,4 @@
60
+
61
+ if __name__ == "__main__":
62
+ success = main()
63
+ - sys.exit(0 if success else 1)
64
+ + sys.exit(0 if success else 1)
65
+
requirements.docker.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Docker-optimized requirements for AI Notes Summarizer
2
+ # Core web framework
3
+ streamlit>=1.28.0
4
+
5
+ # AI/ML libraries - CPU optimized versions for smaller image size
6
+ transformers>=4.35.0
7
+ torch>=2.0.0,<2.1.0 # Pin to avoid large updates
8
+ tokenizers>=0.14.0
9
+
10
+ # PDF processing
11
+ PyPDF2>=3.0.1
12
+
13
+ # Text processing and utilities
14
+ regex>=2023.10.3
15
+ numpy>=1.24.0,<1.25.0 # Pin for stability
16
+ pandas>=2.0.0,<2.1.0 # Pin for stability
17
+
18
+ # Additional dependencies for transformers
19
+ accelerate>=0.24.0
20
+ safetensors>=0.4.0
21
+
22
+ # Optional: For better performance (smaller package)
23
+ sentencepiece>=0.1.99
24
+
25
+ # Production dependencies
26
+ gunicorn>=21.2.0 # For potential WSGI deployment
27
+ uvicorn>=0.23.0 # Alternative ASGI server
requirements.txt CHANGED
@@ -1,3 +1,27 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core web framework
2
+ streamlit>=1.28.0
3
+
4
+ # AI/ML libraries
5
+ transformers>=4.35.0
6
+ torch>=2.0.0
7
+ tokenizers>=0.14.0
8
+
9
+ # PDF processing
10
+ PyPDF2>=3.0.1
11
+
12
+ # Text processing and utilities
13
+ regex>=2023.10.3
14
+ numpy>=1.24.0
15
+ pandas>=2.0.0
16
+
17
+ # Additional dependencies for transformers
18
+ accelerate>=0.24.0
19
+ safetensors>=0.4.0
20
+
21
+ # Optional: For better performance
22
+ sentencepiece>=0.1.99
23
+
24
+ # Development and testing (optional)
25
+ pytest>=7.4.0
26
+ black>=23.0.0
27
+ flake8>=6.0.0
run.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Run script for AI Notes Summarizer
4
+ """
5
+
6
+ import subprocess
7
+ import sys
8
+ import os
9
+
10
+
11
+ def main():
12
+ """Run the Streamlit application"""
13
+ try:
14
+ # Change to the application directory
15
+ app_dir = os.path.dirname(os.path.abspath(__file__))
16
+ os.chdir(app_dir)
17
+
18
+ # Run streamlit
19
+ subprocess.run(
20
+ [
21
+ sys.executable,
22
+ "-m",
23
+ "streamlit",
24
+ "run",
25
+ "app.py",
26
+ "--server.headless",
27
+ "true",
28
+ "--server.port",
29
+ "8501",
30
+ "--server.address",
31
+ "0.0.0.0",
32
+ ]
33
+ )
34
+ except KeyboardInterrupt:
35
+ print("\nApplication stopped by user")
36
+ except Exception as e:
37
+ print(f"Error running application: {e}")
38
+ sys.exit(1)
39
+
40
+
41
+ if __name__ == "__main__":
42
+ main()
setup.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Setup script for AI Notes Summarizer
3
+ """
4
+
5
+ from setuptools import setup, find_packages
6
+
7
+ with open("README.md", "r", encoding="utf-8") as fh:
8
+ long_description = fh.read()
9
+
10
+ with open("requirements.txt", "r", encoding="utf-8") as fh:
11
+ requirements = [
12
+ line.strip() for line in fh if line.strip() and not line.startswith("#")
13
+ ]
14
+
15
+ setup(
16
+ name="ai-notes-summarizer",
17
+ version="1.0.0",
18
+ author="AI Notes Summarizer",
19
+ description="A web application for AI-powered document summarization",
20
+ long_description=long_description,
21
+ long_description_content_type="text/markdown",
22
+ packages=find_packages(),
23
+ classifiers=[
24
+ "Development Status :: 4 - Beta",
25
+ "Intended Audience :: Education",
26
+ "Intended Audience :: End Users/Desktop",
27
+ "License :: OSI Approved :: MIT License",
28
+ "Operating System :: OS Independent",
29
+ "Programming Language :: Python :: 3",
30
+ "Programming Language :: Python :: 3.8",
31
+ "Programming Language :: Python :: 3.9",
32
+ "Programming Language :: Python :: 3.10",
33
+ "Programming Language :: Python :: 3.11",
34
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
35
+ "Topic :: Text Processing :: Linguistic",
36
+ ],
37
+ python_requires=">=3.8",
38
+ install_requires=requirements,
39
+ entry_points={
40
+ "console_scripts": [
41
+ "ai-notes-summarizer=app:main",
42
+ ],
43
+ },
44
+ )
start.sh ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # AI Notes Summarizer - Startup Script
4
+ echo "πŸš€ Starting AI Notes Summarizer..."
5
+
6
+ # Check if Python is available
7
+ if ! command -v python3 &> /dev/null; then
8
+ echo "❌ Python 3 is not installed. Please install Python 3.8 or higher."
9
+ exit 1
10
+ fi
11
+
12
+ # Check if pip is available
13
+ if ! command -v pip3 &> /dev/null; then
14
+ echo "❌ pip3 is not installed. Please install pip3."
15
+ exit 1
16
+ fi
17
+
18
+ # Check if virtual environment exists
19
+ if [ ! -d "venv" ]; then
20
+ echo "πŸ“¦ Creating virtual environment..."
21
+ python3 -m venv venv
22
+ fi
23
+
24
+ # Activate virtual environment
25
+ echo "πŸ”§ Activating virtual environment..."
26
+ source venv/bin/activate
27
+
28
+ # Install requirements if not already installed
29
+ if [ ! -f "venv/installed" ]; then
30
+ echo "πŸ“₯ Installing dependencies..."
31
+ pip install -r requirements.txt
32
+ touch venv/installed
33
+ echo "βœ… Dependencies installed successfully!"
34
+ fi
35
+
36
+ # Start the application
37
+ echo "🌟 Launching AI Notes Summarizer..."
38
+ echo "πŸ“± The application will open in your browser at http://localhost:8501"
39
+ echo "⏹️ Press Ctrl+C to stop the application"
40
+ echo ""
41
+
42
+ streamlit run app.py
test_basic.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Basic tests for AI Notes Summarizer modules
4
+ """
5
+
6
+ import sys
7
+ import os
8
+
9
+ # Add the current directory to Python path
10
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
11
+
12
+
13
+ def test_imports():
14
+ """Test if all modules can be imported"""
15
+ print("Testing module imports...")
16
+
17
+ try:
18
+ from modules.pdf_processor import PDFProcessor
19
+
20
+ print("βœ… PDF Processor imported successfully")
21
+ except ImportError as e:
22
+ print(f"❌ Failed to import PDF Processor: {e}")
23
+ return False
24
+
25
+ try:
26
+ from modules.text_summarizer import TextSummarizer
27
+
28
+ print("βœ… Text Summarizer imported successfully")
29
+ except ImportError as e:
30
+ print(f"❌ Failed to import Text Summarizer: {e}")
31
+ return False
32
+
33
+ try:
34
+ from modules.utils import setup_logging, validate_input
35
+
36
+ print("βœ… Utils imported successfully")
37
+ except ImportError as e:
38
+ print(f"❌ Failed to import Utils: {e}")
39
+ return False
40
+
41
+ return True
42
+
43
+
44
+ def test_pdf_processor():
45
+ """Test PDF processor basic functionality"""
46
+ print("\nTesting PDF Processor...")
47
+
48
+ try:
49
+ from modules.pdf_processor import PDFProcessor
50
+
51
+ processor = PDFProcessor()
52
+
53
+ # Test text preprocessing
54
+ test_text = "This is a test\n\nwith multiple spaces\nand newlines."
55
+ cleaned = processor.preprocess_text(test_text)
56
+ print(f"βœ… Text preprocessing works: '{cleaned}'")
57
+
58
+ return True
59
+ except Exception as e:
60
+ print(f"❌ PDF Processor test failed: {e}")
61
+ return False
62
+
63
+
64
+ def test_text_summarizer():
65
+ """Test text summarizer basic functionality"""
66
+ print("\nTesting Text Summarizer...")
67
+
68
+ try:
69
+ from modules.text_summarizer import TextSummarizer
70
+
71
+ summarizer = TextSummarizer()
72
+
73
+ # Test text chunking without model loading
74
+ test_text = "This is a test sentence. " * 100
75
+ chunks = summarizer.chunk_text(test_text)
76
+ print(f"βœ… Text chunking works: {len(chunks)} chunks created")
77
+
78
+ # Test bullet formatting
79
+ test_summary = "This is the first point. This is the second point. This is the third point."
80
+ bullets = summarizer.format_as_bullets(test_summary)
81
+ print(f"βœ… Bullet formatting works:\n{bullets}")
82
+
83
+ return True
84
+ except Exception as e:
85
+ print(f"❌ Text Summarizer test failed: {e}")
86
+ return False
87
+
88
+
89
+ def test_utils():
90
+ """Test utility functions"""
91
+ print("\nTesting Utils...")
92
+
93
+ try:
94
+ from modules.utils import validate_input, clean_text, format_file_size
95
+
96
+ # Test input validation
97
+ valid = validate_input(
98
+ "This is a test text that is long enough to pass validation."
99
+ )
100
+ print(f"βœ… Input validation works: {valid}")
101
+
102
+ # Test text cleaning
103
+ dirty_text = "This has multiple spaces and special@#$%characters!"
104
+ clean = clean_text(dirty_text)
105
+ print(f"βœ… Text cleaning works: '{clean}'")
106
+
107
+ # Test file size formatting
108
+ size_str = format_file_size(1024 * 1024)
109
+ print(f"βœ… File size formatting works: {size_str}")
110
+
111
+ return True
112
+ except Exception as e:
113
+ print(f"❌ Utils test failed: {e}")
114
+ return False
115
+
116
+
117
+ def main():
118
+ """Run all tests"""
119
+ print("πŸ§ͺ Running Basic Tests for AI Notes Summarizer\n")
120
+
121
+ tests = [test_imports, test_pdf_processor, test_text_summarizer, test_utils]
122
+
123
+ passed = 0
124
+ total = len(tests)
125
+
126
+ for test in tests:
127
+ if test():
128
+ passed += 1
129
+ print()
130
+
131
+ print(f"πŸ“Š Test Results: {passed}/{total} tests passed")
132
+
133
+ if passed == total:
134
+ print("πŸŽ‰ All tests passed! The application is ready to run.")
135
+ return True
136
+ else:
137
+ print("⚠️ Some tests failed. Please check the errors above.")
138
+ return False
139
+
140
+
141
+ if __name__ == "__main__":
142
+ success = main()
143
+ sys.exit(0 if success else 1)