rajendrakumarv tejovk311 commited on
Commit
ab3e631
·
verified ·
1 Parent(s): 73769bb

Upload 3 files (#2)

Browse files

- Upload 3 files (3ba492afdd98f2f37520efca605563be71e92e5a)


Co-authored-by: Kattamuri Tejo Vardhan <tejovk311@users.noreply.huggingface.co>

Files changed (3) hide show
  1. Dockerfile.unknown +35 -0
  2. app.py +167 -0
  3. requirements.txt +15 -0
Dockerfile.unknown ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ # Install system dependencies for video processing
4
+ RUN apt-get update && apt-get install -y \
5
+ ffmpeg \
6
+ libsm6 \
7
+ libxext6 \
8
+ libxrender-dev \
9
+ libgl1-mesa-glx \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Set working directory
13
+ WORKDIR /app
14
+
15
+ # Copy requirements file
16
+ COPY requirements.txt .
17
+
18
+ # Install Python dependencies
19
+ RUN pip install --no-cache-dir -r requirements.txt
20
+
21
+ # Copy application code
22
+ COPY app.py .
23
+
24
+ # Create a directory for temporary files
25
+ RUN mkdir -p /tmp/video_processing && chmod 777 /tmp/video_processing
26
+
27
+ # Set environment variables
28
+ ENV PYTHONUNBUFFERED=1
29
+ ENV PORT=5000
30
+
31
+ # Expose port
32
+ EXPOSE 5000
33
+
34
+ # Command to run the application
35
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ import os
3
+ import numpy as np
4
+ import torch
5
+ import av
6
+ import cv2
7
+ import tempfile
8
+ import shutil
9
+ import logging
10
+ from transformers import VideoMAEForVideoClassification, VideoMAEImageProcessor
11
+ from PIL import Image
12
+ from torchvision.transforms import Compose, Resize, ToTensor
13
+
14
+ app = Flask(__name__)
15
+ # Configure logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Global variables to store model and processor
20
+ device = "cuda" if torch.cuda.is_available() else "cpu"
21
+ model = None
22
+ processor = None
23
+ transform = None
24
+
25
+ def load_model():
26
+ """Load the model and processor"""
27
+ global model, processor, transform
28
+ if model is None:
29
+ model_name = "OPear/videomae-large-finetuned-UCF-Crime"
30
+ logger.info(f"Loading model {model_name} on {device}...")
31
+ model = VideoMAEForVideoClassification.from_pretrained(model_name).to(device)
32
+ processor = VideoMAEImageProcessor.from_pretrained(model_name)
33
+ transform = Compose([
34
+ Resize((224, 224)),
35
+ ToTensor(),
36
+ ])
37
+ logger.info("Model loaded successfully")
38
+ return model, processor, transform
39
+
40
+ def sample_frame_indices(clip_len=16, frame_sample_rate=1, seg_len=0):
41
+ """Samples exactly 16 frames uniformly from the video."""
42
+ if seg_len <= clip_len:
43
+ indices = np.linspace(0, seg_len - 1, num=clip_len, dtype=int)
44
+ else:
45
+ end_idx = np.random.randint(clip_len, seg_len)
46
+ start_idx = max(0, end_idx - clip_len)
47
+ indices = np.linspace(start_idx, end_idx - 1, num=clip_len, dtype=int)
48
+ return np.clip(indices, 0, seg_len - 1)
49
+
50
+ def process_video(video_path):
51
+ try:
52
+ container = av.open(video_path)
53
+ video_stream = container.streams.video[0]
54
+ seg_len = video_stream.frames if video_stream.frames > 0 else int(cv2.VideoCapture(video_path).get(cv2.CAP_PROP_FRAME_COUNT))
55
+ except Exception as e:
56
+ logger.error(f"Error opening video: {str(e)}")
57
+ return None, None
58
+
59
+ indices = sample_frame_indices(clip_len=16, seg_len=seg_len)
60
+ frames = []
61
+
62
+ try:
63
+ container.seek(0)
64
+ for i, frame in enumerate(container.decode(video=0)):
65
+ if i > indices[-1]:
66
+ break
67
+ if i in indices:
68
+ frames.append(frame.to_ndarray(format="rgb24"))
69
+ except Exception as e:
70
+ logger.error(f"Error decoding video with PyAV: {str(e)}")
71
+
72
+ if not frames:
73
+ logger.info("Falling back to OpenCV for frame extraction")
74
+ cap = cv2.VideoCapture(video_path)
75
+ for i in indices:
76
+ cap.set(cv2.CAP_PROP_POS_FRAMES, i)
77
+ ret, frame = cap.read()
78
+ if ret:
79
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
80
+ frames.append(frame)
81
+ cap.release()
82
+
83
+ if len(frames) != 16:
84
+ logger.error(f"Could not extract 16 frames, got {len(frames)}")
85
+ return None, None
86
+
87
+ return np.stack(frames), indices
88
+
89
+ def predict_video(frames):
90
+ """Processes frames and runs VideoMAE classification."""
91
+ model, processor, transform = load_model()
92
+
93
+ video_tensor = torch.stack([transform(Image.fromarray(frame)) for frame in frames])
94
+ video_tensor = video_tensor.unsqueeze(0) # Add batch dimension
95
+
96
+ inputs = processor(list(video_tensor[0]), return_tensors="pt", do_rescale=False)
97
+ inputs = {k: v.to(device) for k, v in inputs.items()}
98
+
99
+ with torch.no_grad(): # Disable gradient calculation for inference
100
+ outputs = model(**inputs)
101
+
102
+ logits = outputs.logits
103
+ predicted_class = logits.argmax(-1).item()
104
+
105
+ id2label = model.config.id2label
106
+ return id2label.get(predicted_class, "Unknown")
107
+
108
+ @app.route('/classify-video', methods=['POST'])
109
+ def classify_video():
110
+ if 'video' not in request.files:
111
+ logger.warning("No video file in request")
112
+ return jsonify({'error': 'No video file provided'}), 400
113
+
114
+ video_file = request.files['video']
115
+
116
+ if video_file.filename == '':
117
+ logger.warning("Empty video filename")
118
+ return jsonify({'error': 'No video selected'}), 400
119
+
120
+ # Create temporary directory
121
+ temp_dir = tempfile.mkdtemp()
122
+ video_path = os.path.join(temp_dir, video_file.filename)
123
+
124
+ try:
125
+ # Save the uploaded video
126
+ logger.info(f"Saving uploaded video to {video_path}")
127
+ video_file.save(video_path)
128
+
129
+ # Process the video
130
+ logger.info("Processing video...")
131
+ frames, indices = process_video(video_path)
132
+
133
+ if frames is None:
134
+ return jsonify({'error': 'Failed to process video file'}), 400
135
+
136
+ # Get the prediction
137
+ logger.info("Running prediction...")
138
+ prediction = predict_video(frames)
139
+
140
+ logger.info(f"Prediction result: {prediction}")
141
+ return jsonify({'prediction': prediction})
142
+
143
+ except Exception as e:
144
+ logger.exception(f"Error processing video: {str(e)}")
145
+ return jsonify({'error': f'Error processing video: {str(e)}'}), 500
146
+
147
+ finally:
148
+ # Clean up the temporary directory and its contents
149
+ if os.path.exists(temp_dir):
150
+ logger.info(f"Cleaning up temporary directory: {temp_dir}")
151
+ shutil.rmtree(temp_dir)
152
+
153
+ @app.route('/health', methods=['GET'])
154
+ def health_check():
155
+ """Endpoint to check if the service is up and running"""
156
+ return jsonify({"status": "healthy"}), 200
157
+
158
+ if __name__ == '__main__':
159
+ # Load model at startup
160
+ logger.info("Initializing application...")
161
+ load_model()
162
+
163
+ # Get port from environment variable or use 5000 as default
164
+ port = int(os.environ.get('PORT', 7860))
165
+
166
+ logger.info(f"Starting Flask application on port {port}")
167
+ app.run(host='0.0.0.0', port=port, debug=False)
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Flask==3.1.1
2
+ av==14.4.0
3
+ opencv-python==4.11.0.86
4
+ numpy==2.0.2
5
+ pillow==11.2.1
6
+ torch==2.7.0
7
+ torchvision==0.22.0
8
+ transformers==4.52.3
9
+ huggingface-hub==0.32.0
10
+ requests==2.32.3
11
+ pyyaml==6.0.2
12
+ tqdm==4.67.1
13
+ regex==2024.11.6
14
+ filelock==3.18.0
15
+ packaging==24.2