Commit
·
f51532a
1
Parent(s):
119352e
Simplify the web server
Browse files- Dockerfile +5 -11
- app.py +19 -59
- docker-compose.yml +5 -91
- gunicorn_config.py +10 -11
Dockerfile
CHANGED
@@ -5,7 +5,6 @@ WORKDIR /app
|
|
5 |
# Install system dependencies
|
6 |
RUN apt-get update && apt-get install -y \
|
7 |
build-essential \
|
8 |
-
curl \
|
9 |
&& rm -rf /var/lib/apt/lists/*
|
10 |
|
11 |
# Copy requirements first to leverage Docker cache
|
@@ -15,15 +14,10 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
15 |
# Copy application code
|
16 |
COPY . .
|
17 |
|
18 |
-
# Create
|
19 |
-
RUN mkdir -p uploads &&
|
|
|
|
|
20 |
|
21 |
-
#
|
22 |
-
RUN useradd -m appuser && chown -R appuser:appuser /app
|
23 |
-
USER appuser
|
24 |
-
|
25 |
-
# # Set environment variables for the buffered output
|
26 |
-
# ENV PYTHONUNBUFFERED=1
|
27 |
-
|
28 |
-
# Default command (can be overridden in docker-compose.yml)
|
29 |
CMD ["gunicorn", "--config", "gunicorn_config.py", "app:app"]
|
|
|
5 |
# Install system dependencies
|
6 |
RUN apt-get update && apt-get install -y \
|
7 |
build-essential \
|
|
|
8 |
&& rm -rf /var/lib/apt/lists/*
|
9 |
|
10 |
# Copy requirements first to leverage Docker cache
|
|
|
14 |
# Copy application code
|
15 |
COPY . .
|
16 |
|
17 |
+
# Create necessary directories with proper permissions
|
18 |
+
RUN mkdir -p uploads md_files && \
|
19 |
+
chmod 777 uploads && \
|
20 |
+
chmod 755 md_files
|
21 |
|
22 |
+
# Default command
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
CMD ["gunicorn", "--config", "gunicorn_config.py", "app:app"]
|
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
# import json
|
2 |
import inspect
|
3 |
import io
|
4 |
import os
|
@@ -16,7 +15,6 @@ import numpy as np
|
|
16 |
import orjson
|
17 |
import pandas as pd
|
18 |
from flask import Flask, Response, render_template, request, send_file
|
19 |
-
from flask_status import FlaskStatus
|
20 |
from selector.methods.distance import DISE, MaxMin, MaxSum, OptiSim
|
21 |
from selector.methods.partition import GridPartition, Medoid
|
22 |
from selector.methods.similarity import NSimilarity
|
@@ -24,17 +22,13 @@ from selector.measures.diversity import compute_diversity
|
|
24 |
from sklearn.metrics import pairwise_distances
|
25 |
from werkzeug.utils import secure_filename
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
CELERY_AVAILABLE = True
|
31 |
-
except ImportError:
|
32 |
-
CELERY_AVAILABLE = False
|
33 |
|
34 |
app = Flask(__name__)
|
35 |
-
app_status = FlaskStatus(app)
|
36 |
app.config["MAX_CONTENT_LENGTH"] = 32 * 1024 * 1024 # 32MB max file size
|
37 |
-
app.config["UPLOAD_FOLDER"] =
|
38 |
file_lock = threading.Lock()
|
39 |
|
40 |
# Ensure upload directory exists
|
@@ -57,24 +51,23 @@ SELECTION_ALGORITHM_MAP = {
|
|
57 |
|
58 |
|
59 |
def allowed_file(filename):
|
|
|
60 |
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
61 |
|
62 |
-
|
63 |
def get_unique_upload_dir():
|
64 |
"""Create a unique directory for each upload session."""
|
65 |
unique_dir = os.path.join(app.config["UPLOAD_FOLDER"], str(uuid.uuid4()))
|
66 |
os.makedirs(unique_dir, exist_ok=True)
|
|
|
67 |
return unique_dir
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
"""Safely clean up upload directory."""
|
72 |
try:
|
73 |
-
if os.path.exists(
|
74 |
-
shutil.rmtree(
|
75 |
except Exception as e:
|
76 |
-
print(f"Error cleaning
|
77 |
-
|
78 |
|
79 |
def load_data(filepath):
|
80 |
"""Load data from various file formats."""
|
@@ -348,6 +341,7 @@ def upload_selection_file():
|
|
348 |
|
349 |
with file_lock:
|
350 |
file.save(file_path)
|
|
|
351 |
|
352 |
# Load data
|
353 |
array = load_data(file_path)
|
@@ -434,7 +428,7 @@ def calculate_diversity():
|
|
434 |
# Get files from request
|
435 |
feature_subset_file = request.files.get('feature_subset')
|
436 |
features_file = request.files.get('features')
|
437 |
-
|
438 |
if not feature_subset_file:
|
439 |
return create_json_response({"error": "Feature subset file is required"}, 400)
|
440 |
|
@@ -497,7 +491,7 @@ def calculate_diversity():
|
|
497 |
features=features,
|
498 |
cs=cs
|
499 |
)
|
500 |
-
|
501 |
return create_json_response({
|
502 |
"success": True,
|
503 |
"diversity_score": float(diversity_score)
|
@@ -512,44 +506,10 @@ def calculate_diversity():
|
|
512 |
except Exception as e:
|
513 |
return create_json_response({"error": str(e)}, 500)
|
514 |
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
""
|
519 |
-
status = {
|
520 |
-
"status": "ok",
|
521 |
-
"message": "Server is running",
|
522 |
-
"timestamp": datetime.now().isoformat(),
|
523 |
-
"components": {"flask": True, "celery": False, "redis": False},
|
524 |
-
}
|
525 |
-
|
526 |
-
if CELERY_AVAILABLE:
|
527 |
-
# Check Celery
|
528 |
-
try:
|
529 |
-
celery.control.ping(timeout=1)
|
530 |
-
status["components"]["celery"] = True
|
531 |
-
except Exception as e:
|
532 |
-
print(f"Celery check failed: {e}")
|
533 |
-
|
534 |
-
# Check Redis
|
535 |
-
try:
|
536 |
-
redis_client = celery.backend.client
|
537 |
-
redis_client.ping()
|
538 |
-
status["components"]["redis"] = True
|
539 |
-
except Exception as e:
|
540 |
-
print(f"Redis check failed: {e}")
|
541 |
-
|
542 |
-
# Set overall status
|
543 |
-
if not all(status["components"].values()):
|
544 |
-
status["status"] = "degraded"
|
545 |
-
status["message"] = "Some components are not available"
|
546 |
-
else:
|
547 |
-
status["message"] = "Running without Celery/Redis support"
|
548 |
-
|
549 |
-
return create_json_response(status)
|
550 |
-
|
551 |
|
552 |
if __name__ == "__main__":
|
553 |
-
app.run(debug=True, host="0.0.0.0", port=
|
554 |
-
from flask_debugtoolbar import DebugToolbarExtension
|
555 |
-
toolbar = DebugToolbarExtension(app)
|
|
|
|
|
1 |
import inspect
|
2 |
import io
|
3 |
import os
|
|
|
15 |
import orjson
|
16 |
import pandas as pd
|
17 |
from flask import Flask, Response, render_template, request, send_file
|
|
|
18 |
from selector.methods.distance import DISE, MaxMin, MaxSum, OptiSim
|
19 |
from selector.methods.partition import GridPartition, Medoid
|
20 |
from selector.methods.similarity import NSimilarity
|
|
|
22 |
from sklearn.metrics import pairwise_distances
|
23 |
from werkzeug.utils import secure_filename
|
24 |
|
25 |
+
# Constants
|
26 |
+
UPLOAD_FOLDER = "uploads"
|
27 |
+
ALLOWED_EXTENSIONS = {"txt", "npz", "xlsx", "xls"}
|
|
|
|
|
|
|
28 |
|
29 |
app = Flask(__name__)
|
|
|
30 |
app.config["MAX_CONTENT_LENGTH"] = 32 * 1024 * 1024 # 32MB max file size
|
31 |
+
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
|
32 |
file_lock = threading.Lock()
|
33 |
|
34 |
# Ensure upload directory exists
|
|
|
51 |
|
52 |
|
53 |
def allowed_file(filename):
|
54 |
+
"""Check if file extension is allowed."""
|
55 |
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
56 |
|
|
|
57 |
def get_unique_upload_dir():
|
58 |
"""Create a unique directory for each upload session."""
|
59 |
unique_dir = os.path.join(app.config["UPLOAD_FOLDER"], str(uuid.uuid4()))
|
60 |
os.makedirs(unique_dir, exist_ok=True)
|
61 |
+
os.chmod(unique_dir, 0o777) # Full permissions for Docker container
|
62 |
return unique_dir
|
63 |
|
64 |
+
def clean_upload_dir(upload_dir):
|
65 |
+
"""Clean up upload directory after processing."""
|
|
|
66 |
try:
|
67 |
+
if os.path.exists(upload_dir):
|
68 |
+
shutil.rmtree(upload_dir)
|
69 |
except Exception as e:
|
70 |
+
print(f"Error cleaning upload directory: {e}")
|
|
|
71 |
|
72 |
def load_data(filepath):
|
73 |
"""Load data from various file formats."""
|
|
|
341 |
|
342 |
with file_lock:
|
343 |
file.save(file_path)
|
344 |
+
# os.chmod(file_path, 0o666) # Read/write for all
|
345 |
|
346 |
# Load data
|
347 |
array = load_data(file_path)
|
|
|
428 |
# Get files from request
|
429 |
feature_subset_file = request.files.get('feature_subset')
|
430 |
features_file = request.files.get('features')
|
431 |
+
|
432 |
if not feature_subset_file:
|
433 |
return create_json_response({"error": "Feature subset file is required"}, 400)
|
434 |
|
|
|
491 |
features=features,
|
492 |
cs=cs
|
493 |
)
|
494 |
+
|
495 |
return create_json_response({
|
496 |
"success": True,
|
497 |
"diversity_score": float(diversity_score)
|
|
|
506 |
except Exception as e:
|
507 |
return create_json_response({"error": str(e)}, 500)
|
508 |
|
509 |
+
@app.route('/health')
|
510 |
+
def health_check():
|
511 |
+
"""Health check endpoint for Docker"""
|
512 |
+
return create_json_response({"status": "healthy"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
513 |
|
514 |
if __name__ == "__main__":
|
515 |
+
app.run(debug=True, host="0.0.0.0", port=8009)
|
|
|
|
docker-compose.yml
CHANGED
@@ -3,106 +3,20 @@ version: '3.8'
|
|
3 |
services:
|
4 |
web:
|
5 |
build: .
|
6 |
-
command: gunicorn --config gunicorn_config.py app:app
|
7 |
-
expose:
|
8 |
-
- "8008"
|
9 |
-
volumes:
|
10 |
-
- .:/app
|
11 |
-
- upload_data:/app/uploads
|
12 |
-
depends_on:
|
13 |
-
redis:
|
14 |
-
condition: service_healthy
|
15 |
-
environment:
|
16 |
-
- FLASK_ENV=production
|
17 |
-
- REDIS_URL=redis://redis:6379/0
|
18 |
-
deploy:
|
19 |
-
replicas: 1
|
20 |
-
resources:
|
21 |
-
limits:
|
22 |
-
cpus: '0.6'
|
23 |
-
memory: 6G
|
24 |
-
healthcheck:
|
25 |
-
test: ["CMD", "curl", "-f", "http://localhost:8008/health"]
|
26 |
-
interval: 30s
|
27 |
-
timeout: 10s
|
28 |
-
retries: 3
|
29 |
-
start_period: 40s
|
30 |
-
restart: unless-stopped
|
31 |
-
|
32 |
-
redis:
|
33 |
-
image: redis:7-alpine
|
34 |
-
command: redis-server --appendonly yes
|
35 |
-
volumes:
|
36 |
-
- redis_data:/data
|
37 |
ports:
|
38 |
-
- "
|
39 |
-
healthcheck:
|
40 |
-
test: ["CMD", "redis-cli", "ping"]
|
41 |
-
interval: 10s
|
42 |
-
timeout: 5s
|
43 |
-
retries: 3
|
44 |
-
deploy:
|
45 |
-
resources:
|
46 |
-
limits:
|
47 |
-
cpus: '0.4'
|
48 |
-
memory: 1G
|
49 |
-
restart: unless-stopped
|
50 |
-
|
51 |
-
celery_worker:
|
52 |
-
build: .
|
53 |
-
command: celery -A app.celery worker --loglevel=info
|
54 |
volumes:
|
55 |
- .:/app
|
56 |
- upload_data:/app/uploads
|
57 |
-
depends_on:
|
58 |
-
- redis
|
59 |
environment:
|
60 |
-
-
|
61 |
-
deploy:
|
62 |
-
replicas: 2
|
63 |
-
resources:
|
64 |
-
limits:
|
65 |
-
cpus: '0.8'
|
66 |
-
memory: 6G
|
67 |
-
restart: unless-stopped
|
68 |
-
|
69 |
-
celery_flower:
|
70 |
-
build: .
|
71 |
-
command: celery -A app.celery flower
|
72 |
-
ports:
|
73 |
-
- "5555:5555"
|
74 |
-
volumes:
|
75 |
-
- .:/app
|
76 |
-
- flower_data:/app/flower
|
77 |
-
depends_on:
|
78 |
-
- redis
|
79 |
-
- celery_worker
|
80 |
-
environment:
|
81 |
-
- REDIS_URL=redis://redis:6379/0
|
82 |
-
deploy:
|
83 |
-
resources:
|
84 |
-
limits:
|
85 |
-
cpus: '0.2'
|
86 |
-
memory: 512M
|
87 |
-
restart: unless-stopped
|
88 |
-
|
89 |
-
nginx:
|
90 |
-
image: nginx:alpine
|
91 |
-
ports:
|
92 |
-
- "8008:8008"
|
93 |
-
volumes:
|
94 |
-
- ./nginx.conf:/etc/nginx/nginx.conf:ro
|
95 |
-
- .:/app:ro
|
96 |
-
depends_on:
|
97 |
-
- web
|
98 |
deploy:
|
99 |
resources:
|
100 |
limits:
|
101 |
-
cpus: '0
|
102 |
-
memory:
|
103 |
restart: unless-stopped
|
104 |
|
105 |
volumes:
|
106 |
-
redis_data:
|
107 |
upload_data:
|
108 |
-
flower_data:
|
|
|
3 |
services:
|
4 |
web:
|
5 |
build: .
|
6 |
+
command: gunicorn --config gunicorn_config.py app:app --reload
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
ports:
|
8 |
+
- "8009:8009"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
volumes:
|
10 |
- .:/app
|
11 |
- upload_data:/app/uploads
|
|
|
|
|
12 |
environment:
|
13 |
+
- FLASK_ENV=production
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
deploy:
|
15 |
resources:
|
16 |
limits:
|
17 |
+
cpus: '1.0'
|
18 |
+
memory: 12G
|
19 |
restart: unless-stopped
|
20 |
|
21 |
volumes:
|
|
|
22 |
upload_data:
|
|
gunicorn_config.py
CHANGED
@@ -1,21 +1,21 @@
|
|
1 |
import multiprocessing
|
2 |
import os
|
3 |
|
4 |
-
# Number of worker processes -
|
5 |
-
workers =
|
6 |
|
7 |
-
# Number of threads per worker
|
8 |
-
threads =
|
9 |
|
10 |
# Maximum number of pending connections
|
11 |
-
backlog =
|
12 |
|
13 |
# Maximum number of requests a worker will process before restarting
|
14 |
-
max_requests =
|
15 |
max_requests_jitter = 50
|
16 |
|
17 |
-
# Timeout for worker processes (
|
18 |
-
timeout =
|
19 |
|
20 |
# Keep-alive timeout
|
21 |
keepalive = 5
|
@@ -27,10 +27,9 @@ errorlog = "-"
|
|
27 |
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'
|
28 |
|
29 |
# Bind address - use HF_PORT for Hugging Face deployment
|
30 |
-
|
31 |
-
bind = f"0.0.0.0:{port}"
|
32 |
|
33 |
-
# Worker class
|
34 |
worker_class = "sync"
|
35 |
|
36 |
# Process name
|
|
|
1 |
import multiprocessing
|
2 |
import os
|
3 |
|
4 |
+
# Number of worker processes - adjust based on CPU cores
|
5 |
+
workers = 1
|
6 |
|
7 |
+
# Number of threads per worker
|
8 |
+
threads = 4
|
9 |
|
10 |
# Maximum number of pending connections
|
11 |
+
backlog = 2048
|
12 |
|
13 |
# Maximum number of requests a worker will process before restarting
|
14 |
+
max_requests = 10000
|
15 |
max_requests_jitter = 50
|
16 |
|
17 |
+
# Timeout for worker processes (2 minutes)
|
18 |
+
timeout = 120
|
19 |
|
20 |
# Keep-alive timeout
|
21 |
keepalive = 5
|
|
|
27 |
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'
|
28 |
|
29 |
# Bind address - use HF_PORT for Hugging Face deployment
|
30 |
+
bind = "0.0.0.0:8009"
|
|
|
31 |
|
32 |
+
# Worker class
|
33 |
worker_class = "sync"
|
34 |
|
35 |
# Process name
|