Spaces:

BreedingInsight
/

nemaquant

Sleeping

nemaquant / app.py

tyrwh

Finishing up commits before moving to Github as main repo

85022a6 3 months ago

17.7 kB

	import os
	import uuid
	import traceback
	import sys
	import time
	import io
	import zipfile
	import cv2
	import csv
	import pickle
	import shutil
	import logging
	from ultralytics import YOLO
	# from ultralytics.utils import ThreadingLocked
	import numpy as np
	import pandas as pd
	from torch import cuda
	from flask import Flask, Response, render_template, request, jsonify, send_file, session
	from multiprocessing.pool import Pool
	from multiprocessing import set_start_method
	from pathlib import Path
	from PIL import Image
	from datetime import datetime
	from werkzeug.utils import secure_filename
	from yolo_utils import detect_in_image

	app = Flask(__name__)
	app.secret_key = os.environ.get('FLASK_SECRET_KEY', str(uuid.uuid4())) # For session security

	# disable werkzeug logging - too noisy
	# comment out these lines if you want to see full logs
	log = logging.getLogger('werkzeug')
	log.setLevel(logging.ERROR)

	APP_ROOT = Path(__file__).parent
	UPLOAD_FOLDER = APP_ROOT / 'uploads'
	RESULTS_FOLDER = APP_ROOT / 'results'
	ANNOT_FOLDER = APP_ROOT / 'annotated'
	WEIGHTS_FILE = APP_ROOT / 'weights.pt'
	app.config['UPLOAD_FOLDER'] = str(UPLOAD_FOLDER)
	app.config['RESULTS_FOLDER'] = str(RESULTS_FOLDER)
	app.config['WEIGHTS_FILE'] = str(WEIGHTS_FILE)
	app.config['ANNOT_FOLDER'] = str(ANNOT_FOLDER)
	app.config['ALLOWED_EXTENSIONS'] = {'png', 'jpg', 'jpeg', 'tif', 'tiff'}

	# skip these -- created dirs in dockerfile
	# UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True)
	# RESULTS_FOLDER.mkdir(parents=True, exist_ok=True)
	# ANNOT_FOLDER.mkdir(parents=True, exist_ok=True)

	# Load model once at startup, use CUDA if available
	MODEL_DEVICE = 'cuda' if cuda.is_available() else 'cpu'

	# need a global dict to hold async results objects
	# so you can check the progress of an abr
	# maybe there's a better way around this?
	async_results = {}

	@app.errorhandler(Exception)
	def handle_exception(e):
	print(f"Unhandled exception: {str(e)}")
	print(traceback.format_exc())
	return jsonify({"error": "Server error", "log": str(e)}), 500

	# def allowed_file(filename):
	# return '.' in filename and filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS']

	@app.route('/')
	def index():
	return render_template('index.html')

	# save the uploaded files
	@app.route('/uploads', methods=['POST'])
	def upload_files():
	session_id = session['id']
	files = request.files.getlist('files')
	upload_dir = Path(app.config['UPLOAD_FOLDER']) / session_id
	# clear out any existing files for the session
	if upload_dir.exists():
	shutil.rmtree(upload_dir)
	upload_dir.mkdir(parents=True, exist_ok=True)
	# generate new unique filenames via uuid, save the mapping dict of old:new to session
	filename_map = {}
	uuid_map_to_uuid_imgname = {}
	for f in files:
	orig_name = secure_filename(f.filename)
	ext = Path(orig_name).suffix
	uuid_base = uuid.uuid4().hex
	uuid_name = f"{uuid_base}{ext}"
	file_path = upload_dir / uuid_name
	f.save(str(file_path))
	filename_map[uuid_base] = orig_name
	uuid_map_to_uuid_imgname[uuid_base] = uuid_name
	session['filename_map'] = filename_map
	session['uuid_map_to_uuid_imgname'] = uuid_map_to_uuid_imgname
	return jsonify({'filename_map': filename_map, 'status': 'uploaded'})

	# /preview route for serving original uploaded image
	@app.route('/preview', methods=['POST'])
	def preview_image():
	try:
	data = request.get_json()
	uuid = data.get('uuid')
	session_id = session['id']
	uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname', {})
	img_name = uuid_map_to_uuid_imgname.get(uuid)
	if not img_name:
	print(f"/preview: No img_name found for uuid {uuid}")
	return jsonify({'error': 'File not found'}), 404
	img_path = Path(app.config['UPLOAD_FOLDER']) / session_id / img_name
	if not img_path.exists():
	print(f"/preview: File does not exist at {img_path}")
	return jsonify({'error': 'File not found'}), 404
	# Determine MIME type
	ext = img_path.suffix.lower()
	if ext in ['.jpg', '.jpeg']:
	mimetype = 'image/jpeg'
	elif ext in ['.png']:
	mimetype = 'image/png'
	elif ext in ['.tif', '.tiff']:
	mimetype = 'image/tiff'
	else:
	mimetype = 'application/octet-stream'
	return send_file(
	str(img_path),
	mimetype=mimetype,
	as_attachment=False,
	download_name=img_name
	)
	except Exception as e:
	print(f"Error in /preview: {e}")
	return jsonify({'error': str(e)}), 500

	# initializer for Pool to load model in each process
	# each worker will have its own model instance
	def init_worker(model_path):
	global model
	model = YOLO(model_path)
	if MODEL_DEVICE == 'cuda':
	model.to('cuda')

	# not sure if we need this decorator anymore?
	#@ThreadingLocked()
	def process_single_image(img_path, results_dir):
	global model
	uuid_base = img_path.stem
	pickle_path = results_dir / f"{uuid_base}.pkl"
	results = detect_in_image(model, str(img_path))
	with open(pickle_path, 'wb') as pf:
	pickle.dump(results, pf)
	return uuid_base

	@app.route('/process', methods=['POST'])
	def start_processing():
	session_id = session['id']
	job_state = {
	"status": "starting",
	"progress": 0,
	"sessionId": session_id
	}
	session['job_state'] = job_state
	upload_dir = Path(app.config['UPLOAD_FOLDER']) / session_id
	results_dir = Path(app.config['RESULTS_FOLDER']) / session_id
	# clean out old results if needed
	if results_dir.exists():
	shutil.rmtree(results_dir)
	results_dir.mkdir(parents=True)

	# set up iterable of uploaded files to process
	arg_list = [(x,results_dir) for x in list(upload_dir.iterdir())]

	try:
	if MODEL_DEVICE == 'cuda':
	n_proc = 1
	else:
	n_proc = os.cpu_count()
	# Initialize job state
	job_state = {
	"status": "starting",
	"progress": 0,
	"started": True
	}
	session['job_state'] = job_state
	pool = Pool(processes=n_proc,
	initializer=init_worker,
	initargs=(str(WEIGHTS_FILE),))
	async_results[session_id] = pool.starmap_async(process_single_image, arg_list)
	pool.close()

	# Update job state after process launch
	job_state["status"] = "processing"
	session['job_state'] = job_state
	return jsonify({'status': 'processing',
	'sessionId': session_id
	})
	except Exception as e:
	print(f"Error in /process: {e}")
	print(traceback.format_exc())
	return jsonify({'error': str(e),
	'status': 'unknown',
	'sessionId': session_id}), 500

	@app.route('/progress')
	def get_progress():
	session_id = session['id']
	try:
	job_state = session.get('job_state')
	if not job_state:
	print("/progress: No job_state found in session.")
	return jsonify({"status": "error", "error": "No job state"}), 404

	results_dir = Path(app.config['RESULTS_FOLDER']) / session_id
	uploads_dir = Path(app.config['UPLOAD_FOLDER']) / session_id
	n_results = len(list(results_dir.glob('*.pkl')))
	n_uploads = len(list(uploads_dir.iterdir()))

	# If async_result is ready, verify completion and update job state
	async_result = async_results.get(session_id)
	if async_result and async_result.ready():
	if n_results == n_uploads:
	job_state['status'] = 'completed'
	job_state['progress'] = 100
	session['job_state'] = job_state
	resp = {
	'status': 'completed',
	'progress': 100,
	'filename_map': session.get('filename_map', {}),
	'session_id': job_state.get('sessionId'),
	'error': job_state.get('error'),
	}
	# Aggregate results into a single response object
	all_results = {}
	for pkl_file in results_dir.glob('*.pkl'):
	uuid_base = pkl_file.stem
	with open(pkl_file, 'rb') as pf:
	all_results[uuid_base] = pickle.load(pf)
	resp['results'] = all_results
	return jsonify(resp)

	# If still processing, update progress
	if job_state.get('status') == 'processing':
	progress = int((n_results / n_uploads) * 100) if n_uploads > 0 else 0
	job_state['progress'] = progress
	session['job_state'] = job_state
	resp = {
	'status': 'processing',
	'progress': progress,
	'sessionId': session_id,
	}
	return jsonify(resp)
	# Default response as a catchall
	resp = {
	'status': job_state.get('status', 'unknown'),
	'progress': job_state.get('progress', 0),
	'sessionId': job_state.get('session_id'),
	'error': job_state.get('error'),
	}
	return jsonify(resp)
	except Exception as e:
	print(f"Error in /progress: {e}")
	print(traceback.format_exc())
	return jsonify({"status": "error", "error": str(e)}), 500

	def read_img_and_draw(img_path, detections, confidence):
	img = cv2.imread(str(img_path), cv2.IMREAD_UNCHANGED)
	filtered = [d for d in detections if d.get('score', 0) >= confidence]
	for det in filtered:
	x1, y1, x2, y2 = map(int, det['bbox'])
	cv2.rectangle(img, (x1, y1), (x2, y2), (0,0,255), 3)
	return img

	# /annotate route for dynamic annotation
	@app.route('/annotate', methods=['POST'])
	def annotate_image():
	try:
	data = request.get_json()
	uuid = data.get('uuid')
	confidence = float(data.get('confidence', 0.5))
	session_id = session['id']
	uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname', {})
	img_name = uuid_map_to_uuid_imgname.get(uuid)
	orig_img_name = session['filename_map'].get(uuid)

	if not img_name:
	return jsonify({'error': 'File not found'}), 404

	# Load detections from pickle
	result_path = Path(app.config['RESULTS_FOLDER']) / session_id / f"{uuid}.pkl"
	if not result_path.exists():
	return jsonify({'error': 'Results not found'}), 404
	with open(result_path, 'rb') as pf:
	detections = pickle.load(pf)

	img_path = Path(app.config['UPLOAD_FOLDER']) / session_id / img_name
	img = read_img_and_draw(img_path, detections, confidence)
	# Save annotated image out
	annot_dir = Path(app.config['ANNOT_FOLDER']) / session_id
	annot_dir.mkdir(parents=True, exist_ok=True)
	annot_imgname = f"{uuid}_annotated.png"
	annot_imgpath = str(annot_dir / annot_imgname)
	cv2.imwrite(annot_imgpath, img)

	# Serve image directly from disk
	return send_file(
	annot_imgpath,
	mimetype='image/png',
	as_attachment=False,
	download_name=annot_imgname
	)
	except Exception as e:
	print(f"Error in /annotate: {e}")
	return jsonify({'error': str(e)}), 500

	@app.route('/export_images', methods=['POST'])
	def export_images():
	try:
	data = request.get_json()
	confidence = float(data.get('confidence', 0.5))
	session_id = session['id']
	filename_map = session.get('filename_map', {})
	uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname', {})
	# ensure there's a landing spot
	annot_dir = Path(app.config['ANNOT_FOLDER']) / session_id
	annot_dir.mkdir(parents=True, exist_ok=True)

	# add all annotated files to zip
	memory_file = io.BytesIO()
	with zipfile.ZipFile(memory_file, 'w', zipfile.ZIP_DEFLATED) as zf:
	# iterate through all uuids
	for uuid in filename_map.keys():
	img_name = uuid_map_to_uuid_imgname.get(uuid)
	if not img_name:
	continue
	img_path = Path(app.config['UPLOAD_FOLDER']) / session_id / img_name
	result_path = Path(app.config['RESULTS_FOLDER']) / session_id / f"{uuid}.pkl"
	if not result_path.exists():
	return jsonify({'error': 'Results not found'}), 404
	if not img_path.exists():
	return jsonify({'error': 'Image not found'}), 404
	with open(result_path, 'rb') as pf:
	detections = pickle.load(pf)
	img = read_img_and_draw(img_path, detections, confidence)
	# clean the name
	orig_name = filename_map.get(uuid)
	annot_imgname = f"{str(Path(orig_name).stem)}_annotated.png"
	annot_imgpath = str(annot_dir / annot_imgname)
	cv2.imwrite(annot_imgpath, img)
	zf.write(annot_imgpath, annot_imgname)
	# timestamp for filename
	memory_file.seek(0)
	timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')

	return send_file(
	memory_file,
	mimetype='application/zip',
	as_attachment=True,
	download_name=f'nemaquant_annotated_{timestamp}.zip'
	)

	except Exception as e:
	error_message = f"Error exporting images: {str(e)}"
	print(error_message)
	return jsonify({"error": "Server error", "log": error_message}), 500

	@app.route('/export_csv', methods=['POST'])
	def export_csv():
	try:
	data = request.json
	session_id = session['id']
	threshold = float(data.get('confidence', 0.5))
	job_state = session.get('job_state')
	if not job_state:
	return jsonify({'error': 'Job not found'}), 404
	rows = []
	for orig_name, detections in job_state['detections'].items():
	count = sum(1 for d in detections if d['score'] >= threshold)
	rows.append({'Filename': orig_name, 'EggsDetected': count})
	timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
	output = io.StringIO()
	writer = csv.DictWriter(output, fieldnames=['Filename', 'EggsDetected'])
	writer.writeheader()
	writer.writerows(rows)
	output.seek(0)
	return Response(
	output.getvalue(),
	mimetype='text/csv',
	headers={
	'Content-Disposition': f'attachment; filename=nemaquant_results_{timestamp}.csv'
	}
	)
	except Exception as e:
	error_message = f"Error exporting CSV: {str(e)}"
	print(error_message)
	return jsonify({"error": "Server error", "log": error_message}), 500

	@app.before_request
	def ensure_session():
	if 'id' not in session:
	session['id'] = uuid.uuid4().hex
	print(f"New session started: {session['id']}")
	else:
	pass
	# print(f"Existing session: {session['id']}")


	def print_startup_info():
	print("----- NemaQuant Flask App Starting -----")
	print(f"Working directory: {os.getcwd()}")
	python_version_single_line = sys.version.replace('\n', ' ')
	print(f"Python version: {python_version_single_line}")
	print(f"Weights file: {WEIGHTS_FILE}")
	print(f"Weights file exists: {WEIGHTS_FILE.exists()}")

	if WEIGHTS_FILE.exists():
	try:
	print(f"Weights file size: {WEIGHTS_FILE.stat().st_size} bytes")
	except Exception as e:
	print(f"Could not get weights file size: {e}")

	is_container = Path('/.dockerenv').exists() or 'DOCKER_HOST' in os.environ
	print(f"Running in container: {is_container}")

	if is_container:
	try:
	user_info = f"{os.getuid()}:{os.getgid()}"
	print(f"User running process: {user_info}")
	except AttributeError:
	print("User running process: UID/GID not available on this OS")

	for path_str in ["/app/uploads", "/app/results"]:
	path_obj = Path(path_str)
	if path_obj.exists():
	stat_info = path_obj.stat()
	permissions = oct(stat_info.st_mode)[-3:]
	owner = f"{stat_info.st_uid}:{stat_info.st_gid}"
	print(f"Permissions for {path_str}: {permissions}")
	print(f"Owner for {path_str}: {owner}")
	else:
	print(f"Directory {path_str} does not exist.")

	# some cleanup steps - not sure quite where to put these
	print('Running periodic cleanup of old sessions...')
	# Cleanup old session folders
	max_age_hours = 4
	now = time.time()
	for base_dir in [UPLOAD_FOLDER, RESULTS_FOLDER, ANNOT_FOLDER]:
	for session_dir in Path(base_dir).iterdir():
	if session_dir.is_dir():
	mtime = session_dir.stat().st_mtime
	if now - mtime > max_age_hours * 3600:
	shutil.rmtree(session_dir)

	print('App is running at the following local addresses:',
	'http://127.0.0.1:7860⁠',
	'http://localhost:7860⁠',
	sep='\n')

	if __name__ == '__main__':
	print_startup_info()
	app.run(host='0.0.0.0', port=7860, debug=True)