legend1234 commited on
Commit
f51532a
·
1 Parent(s): 119352e

Simplify the web server

Browse files
Files changed (4) hide show
  1. Dockerfile +5 -11
  2. app.py +19 -59
  3. docker-compose.yml +5 -91
  4. gunicorn_config.py +10 -11
Dockerfile CHANGED
@@ -5,7 +5,6 @@ WORKDIR /app
5
  # Install system dependencies
6
  RUN apt-get update && apt-get install -y \
7
  build-essential \
8
- curl \
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
  # Copy requirements first to leverage Docker cache
@@ -15,15 +14,10 @@ RUN pip install --no-cache-dir -r requirements.txt
15
  # Copy application code
16
  COPY . .
17
 
18
- # Create uploads directory
19
- RUN mkdir -p uploads && chmod 777 uploads
 
 
20
 
21
- # Create a non-root user
22
- RUN useradd -m appuser && chown -R appuser:appuser /app
23
- USER appuser
24
-
25
- # # Set environment variables for the buffered output
26
- # ENV PYTHONUNBUFFERED=1
27
-
28
- # Default command (can be overridden in docker-compose.yml)
29
  CMD ["gunicorn", "--config", "gunicorn_config.py", "app:app"]
 
5
  # Install system dependencies
6
  RUN apt-get update && apt-get install -y \
7
  build-essential \
 
8
  && rm -rf /var/lib/apt/lists/*
9
 
10
  # Copy requirements first to leverage Docker cache
 
14
  # Copy application code
15
  COPY . .
16
 
17
+ # Create necessary directories with proper permissions
18
+ RUN mkdir -p uploads md_files && \
19
+ chmod 777 uploads && \
20
+ chmod 755 md_files
21
 
22
+ # Default command
 
 
 
 
 
 
 
23
  CMD ["gunicorn", "--config", "gunicorn_config.py", "app:app"]
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # import json
2
  import inspect
3
  import io
4
  import os
@@ -16,7 +15,6 @@ import numpy as np
16
  import orjson
17
  import pandas as pd
18
  from flask import Flask, Response, render_template, request, send_file
19
- from flask_status import FlaskStatus
20
  from selector.methods.distance import DISE, MaxMin, MaxSum, OptiSim
21
  from selector.methods.partition import GridPartition, Medoid
22
  from selector.methods.similarity import NSimilarity
@@ -24,17 +22,13 @@ from selector.measures.diversity import compute_diversity
24
  from sklearn.metrics import pairwise_distances
25
  from werkzeug.utils import secure_filename
26
 
27
- try:
28
- from celery_config import celery
29
-
30
- CELERY_AVAILABLE = True
31
- except ImportError:
32
- CELERY_AVAILABLE = False
33
 
34
  app = Flask(__name__)
35
- app_status = FlaskStatus(app)
36
  app.config["MAX_CONTENT_LENGTH"] = 32 * 1024 * 1024 # 32MB max file size
37
- app.config["UPLOAD_FOLDER"] = "uploads"
38
  file_lock = threading.Lock()
39
 
40
  # Ensure upload directory exists
@@ -57,24 +51,23 @@ SELECTION_ALGORITHM_MAP = {
57
 
58
 
59
  def allowed_file(filename):
 
60
  return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
61
 
62
-
63
  def get_unique_upload_dir():
64
  """Create a unique directory for each upload session."""
65
  unique_dir = os.path.join(app.config["UPLOAD_FOLDER"], str(uuid.uuid4()))
66
  os.makedirs(unique_dir, exist_ok=True)
 
67
  return unique_dir
68
 
69
-
70
- def clean_upload_dir(directory):
71
- """Safely clean up upload directory."""
72
  try:
73
- if os.path.exists(directory):
74
- shutil.rmtree(directory)
75
  except Exception as e:
76
- print(f"Error cleaning directory {directory}: {e}")
77
-
78
 
79
  def load_data(filepath):
80
  """Load data from various file formats."""
@@ -348,6 +341,7 @@ def upload_selection_file():
348
 
349
  with file_lock:
350
  file.save(file_path)
 
351
 
352
  # Load data
353
  array = load_data(file_path)
@@ -434,7 +428,7 @@ def calculate_diversity():
434
  # Get files from request
435
  feature_subset_file = request.files.get('feature_subset')
436
  features_file = request.files.get('features')
437
-
438
  if not feature_subset_file:
439
  return create_json_response({"error": "Feature subset file is required"}, 400)
440
 
@@ -497,7 +491,7 @@ def calculate_diversity():
497
  features=features,
498
  cs=cs
499
  )
500
-
501
  return create_json_response({
502
  "success": True,
503
  "diversity_score": float(diversity_score)
@@ -512,44 +506,10 @@ def calculate_diversity():
512
  except Exception as e:
513
  return create_json_response({"error": str(e)}, 500)
514
 
515
-
516
- @app.route("/status")
517
- def server_status():
518
- """Return server status"""
519
- status = {
520
- "status": "ok",
521
- "message": "Server is running",
522
- "timestamp": datetime.now().isoformat(),
523
- "components": {"flask": True, "celery": False, "redis": False},
524
- }
525
-
526
- if CELERY_AVAILABLE:
527
- # Check Celery
528
- try:
529
- celery.control.ping(timeout=1)
530
- status["components"]["celery"] = True
531
- except Exception as e:
532
- print(f"Celery check failed: {e}")
533
-
534
- # Check Redis
535
- try:
536
- redis_client = celery.backend.client
537
- redis_client.ping()
538
- status["components"]["redis"] = True
539
- except Exception as e:
540
- print(f"Redis check failed: {e}")
541
-
542
- # Set overall status
543
- if not all(status["components"].values()):
544
- status["status"] = "degraded"
545
- status["message"] = "Some components are not available"
546
- else:
547
- status["message"] = "Running without Celery/Redis support"
548
-
549
- return create_json_response(status)
550
-
551
 
552
  if __name__ == "__main__":
553
- app.run(debug=True, host="0.0.0.0", port=8008)
554
- from flask_debugtoolbar import DebugToolbarExtension
555
- toolbar = DebugToolbarExtension(app)
 
 
1
  import inspect
2
  import io
3
  import os
 
15
  import orjson
16
  import pandas as pd
17
  from flask import Flask, Response, render_template, request, send_file
 
18
  from selector.methods.distance import DISE, MaxMin, MaxSum, OptiSim
19
  from selector.methods.partition import GridPartition, Medoid
20
  from selector.methods.similarity import NSimilarity
 
22
  from sklearn.metrics import pairwise_distances
23
  from werkzeug.utils import secure_filename
24
 
25
+ # Constants
26
+ UPLOAD_FOLDER = "uploads"
27
+ ALLOWED_EXTENSIONS = {"txt", "npz", "xlsx", "xls"}
 
 
 
28
 
29
  app = Flask(__name__)
 
30
  app.config["MAX_CONTENT_LENGTH"] = 32 * 1024 * 1024 # 32MB max file size
31
+ app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
32
  file_lock = threading.Lock()
33
 
34
  # Ensure upload directory exists
 
51
 
52
 
53
  def allowed_file(filename):
54
+ """Check if file extension is allowed."""
55
  return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
56
 
 
57
  def get_unique_upload_dir():
58
  """Create a unique directory for each upload session."""
59
  unique_dir = os.path.join(app.config["UPLOAD_FOLDER"], str(uuid.uuid4()))
60
  os.makedirs(unique_dir, exist_ok=True)
61
+ os.chmod(unique_dir, 0o777) # Full permissions for Docker container
62
  return unique_dir
63
 
64
+ def clean_upload_dir(upload_dir):
65
+ """Clean up upload directory after processing."""
 
66
  try:
67
+ if os.path.exists(upload_dir):
68
+ shutil.rmtree(upload_dir)
69
  except Exception as e:
70
+ print(f"Error cleaning upload directory: {e}")
 
71
 
72
  def load_data(filepath):
73
  """Load data from various file formats."""
 
341
 
342
  with file_lock:
343
  file.save(file_path)
344
+ # os.chmod(file_path, 0o666) # Read/write for all
345
 
346
  # Load data
347
  array = load_data(file_path)
 
428
  # Get files from request
429
  feature_subset_file = request.files.get('feature_subset')
430
  features_file = request.files.get('features')
431
+
432
  if not feature_subset_file:
433
  return create_json_response({"error": "Feature subset file is required"}, 400)
434
 
 
491
  features=features,
492
  cs=cs
493
  )
494
+
495
  return create_json_response({
496
  "success": True,
497
  "diversity_score": float(diversity_score)
 
506
  except Exception as e:
507
  return create_json_response({"error": str(e)}, 500)
508
 
509
+ @app.route('/health')
510
+ def health_check():
511
+ """Health check endpoint for Docker"""
512
+ return create_json_response({"status": "healthy"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
 
514
  if __name__ == "__main__":
515
+ app.run(debug=True, host="0.0.0.0", port=8009)
 
 
docker-compose.yml CHANGED
@@ -3,106 +3,20 @@ version: '3.8'
3
  services:
4
  web:
5
  build: .
6
- command: gunicorn --config gunicorn_config.py app:app
7
- expose:
8
- - "8008"
9
- volumes:
10
- - .:/app
11
- - upload_data:/app/uploads
12
- depends_on:
13
- redis:
14
- condition: service_healthy
15
- environment:
16
- - FLASK_ENV=production
17
- - REDIS_URL=redis://redis:6379/0
18
- deploy:
19
- replicas: 1
20
- resources:
21
- limits:
22
- cpus: '0.6'
23
- memory: 6G
24
- healthcheck:
25
- test: ["CMD", "curl", "-f", "http://localhost:8008/health"]
26
- interval: 30s
27
- timeout: 10s
28
- retries: 3
29
- start_period: 40s
30
- restart: unless-stopped
31
-
32
- redis:
33
- image: redis:7-alpine
34
- command: redis-server --appendonly yes
35
- volumes:
36
- - redis_data:/data
37
  ports:
38
- - "6379:6379"
39
- healthcheck:
40
- test: ["CMD", "redis-cli", "ping"]
41
- interval: 10s
42
- timeout: 5s
43
- retries: 3
44
- deploy:
45
- resources:
46
- limits:
47
- cpus: '0.4'
48
- memory: 1G
49
- restart: unless-stopped
50
-
51
- celery_worker:
52
- build: .
53
- command: celery -A app.celery worker --loglevel=info
54
  volumes:
55
  - .:/app
56
  - upload_data:/app/uploads
57
- depends_on:
58
- - redis
59
  environment:
60
- - REDIS_URL=redis://redis:6379/0
61
- deploy:
62
- replicas: 2
63
- resources:
64
- limits:
65
- cpus: '0.8'
66
- memory: 6G
67
- restart: unless-stopped
68
-
69
- celery_flower:
70
- build: .
71
- command: celery -A app.celery flower
72
- ports:
73
- - "5555:5555"
74
- volumes:
75
- - .:/app
76
- - flower_data:/app/flower
77
- depends_on:
78
- - redis
79
- - celery_worker
80
- environment:
81
- - REDIS_URL=redis://redis:6379/0
82
- deploy:
83
- resources:
84
- limits:
85
- cpus: '0.2'
86
- memory: 512M
87
- restart: unless-stopped
88
-
89
- nginx:
90
- image: nginx:alpine
91
- ports:
92
- - "8008:8008"
93
- volumes:
94
- - ./nginx.conf:/etc/nginx/nginx.conf:ro
95
- - .:/app:ro
96
- depends_on:
97
- - web
98
  deploy:
99
  resources:
100
  limits:
101
- cpus: '0.2'
102
- memory: 512M
103
  restart: unless-stopped
104
 
105
  volumes:
106
- redis_data:
107
  upload_data:
108
- flower_data:
 
3
  services:
4
  web:
5
  build: .
6
+ command: gunicorn --config gunicorn_config.py app:app --reload
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  ports:
8
+ - "8009:8009"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  volumes:
10
  - .:/app
11
  - upload_data:/app/uploads
 
 
12
  environment:
13
+ - FLASK_ENV=production
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  deploy:
15
  resources:
16
  limits:
17
+ cpus: '1.0'
18
+ memory: 12G
19
  restart: unless-stopped
20
 
21
  volumes:
 
22
  upload_data:
 
gunicorn_config.py CHANGED
@@ -1,21 +1,21 @@
1
  import multiprocessing
2
  import os
3
 
4
- # Number of worker processes - limited for 2vCPU environment
5
- workers = 2 # Using 2 workers for 2vCPU
6
 
7
- # Number of threads per worker - reduced for memory efficiency
8
- threads = 2
9
 
10
  # Maximum number of pending connections
11
- backlog = 1024
12
 
13
  # Maximum number of requests a worker will process before restarting
14
- max_requests = 1000
15
  max_requests_jitter = 50
16
 
17
- # Timeout for worker processes (5 minutes)
18
- timeout = 300
19
 
20
  # Keep-alive timeout
21
  keepalive = 5
@@ -27,10 +27,9 @@ errorlog = "-"
27
  access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'
28
 
29
  # Bind address - use HF_PORT for Hugging Face deployment
30
- port = os.getenv('PORT', '8008') # HF uses PORT env var
31
- bind = f"0.0.0.0:{port}"
32
 
33
- # Worker class - using sync for better stability
34
  worker_class = "sync"
35
 
36
  # Process name
 
1
  import multiprocessing
2
  import os
3
 
4
+ # Number of worker processes - adjust based on CPU cores
5
+ workers = 1
6
 
7
+ # Number of threads per worker
8
+ threads = 4
9
 
10
  # Maximum number of pending connections
11
+ backlog = 2048
12
 
13
  # Maximum number of requests a worker will process before restarting
14
+ max_requests = 10000
15
  max_requests_jitter = 50
16
 
17
+ # Timeout for worker processes (2 minutes)
18
+ timeout = 120
19
 
20
  # Keep-alive timeout
21
  keepalive = 5
 
27
  access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'
28
 
29
  # Bind address - use HF_PORT for Hugging Face deployment
30
+ bind = "0.0.0.0:8009"
 
31
 
32
+ # Worker class
33
  worker_class = "sync"
34
 
35
  # Process name