Spaces:
Sleeping
Sleeping
stable endpoint
Browse files- .gitignore +2 -0
- app/__pycache__/main.cpython-312.pyc +0 -0
- app/__pycache__/model.cpython-312.pyc +0 -0
- app/__pycache__/preprocessing.cpython-312.pyc +0 -0
- app/model.py +1 -1
- app/preprocessing.py +20 -1
- eval_results.json +79 -0
- evaluation.py +50 -0
- model/sign_transformer.keras +2 -2
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
test_videos/
|
2 |
+
__pycache__/
|
app/__pycache__/main.cpython-312.pyc
DELETED
Binary file (2.64 kB)
|
|
app/__pycache__/model.cpython-312.pyc
DELETED
Binary file (3.44 kB)
|
|
app/__pycache__/preprocessing.cpython-312.pyc
DELETED
Binary file (9.34 kB)
|
|
app/model.py
CHANGED
@@ -36,7 +36,7 @@ class VideoClassifier:
|
|
36 |
norm_landmarks = self.normalize_landmarks(landmarks)
|
37 |
input_data = np.concatenate([np.squeeze(norm_landmarks), angles], axis=1)
|
38 |
|
39 |
-
predictions = self.model.predict(np.expand_dims(input_data, axis=0))
|
40 |
|
41 |
# Get the predicted class and confidence
|
42 |
predicted_class_idx = predictions.argmax()
|
|
|
36 |
norm_landmarks = self.normalize_landmarks(landmarks)
|
37 |
input_data = np.concatenate([np.squeeze(norm_landmarks), angles], axis=1)
|
38 |
|
39 |
+
predictions = self.model.predict(np.expand_dims(input_data, axis=0), verbose=0)
|
40 |
|
41 |
# Get the predicted class and confidence
|
42 |
predicted_class_idx = predictions.argmax()
|
app/preprocessing.py
CHANGED
@@ -31,6 +31,25 @@ class VideoProcessor:
|
|
31 |
self.model = mp.solutions.holistic.Holistic(static_image_mode=False,
|
32 |
min_detection_confidence=0.3,
|
33 |
min_tracking_confidence=0.3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
def motion_trim(self, video_path: str) -> np.ndarray:
|
36 |
try:
|
@@ -47,7 +66,7 @@ class VideoProcessor:
|
|
47 |
if not ret:
|
48 |
break
|
49 |
|
50 |
-
frame =
|
51 |
frames.append(frame)
|
52 |
|
53 |
# Calculate motion score on the fly
|
|
|
31 |
self.model = mp.solutions.holistic.Holistic(static_image_mode=False,
|
32 |
min_detection_confidence=0.3,
|
33 |
min_tracking_confidence=0.3)
|
34 |
+
|
35 |
+
def resize_to_480p(self, frame):
|
36 |
+
"""
|
37 |
+
Resize the frame to 480p while maintaining the aspect ratio.
|
38 |
+
Handles both portrait and landscape frames.
|
39 |
+
"""
|
40 |
+
height, width = frame.shape[:2]
|
41 |
+
|
42 |
+
if height > width: # Portrait video
|
43 |
+
new_width = 480
|
44 |
+
scale = new_width / width
|
45 |
+
new_height = int(height * scale)
|
46 |
+
else: # Landscape or square video
|
47 |
+
new_height = 480
|
48 |
+
scale = new_height / height
|
49 |
+
new_width = int(width * scale)
|
50 |
+
|
51 |
+
resized_frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_AREA)
|
52 |
+
return resized_frame
|
53 |
|
54 |
def motion_trim(self, video_path: str) -> np.ndarray:
|
55 |
try:
|
|
|
66 |
if not ret:
|
67 |
break
|
68 |
|
69 |
+
frame = self.resize_to_480p(frame)
|
70 |
frames.append(frame)
|
71 |
|
72 |
# Calculate motion score on the fly
|
eval_results.json
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"accuracy": "80%",
|
3 |
+
"correct": 48,
|
4 |
+
"total": 60,
|
5 |
+
"misclassified": [
|
6 |
+
{
|
7 |
+
"path": "test_videos/anak/anak_03.mp4",
|
8 |
+
"actual": "anak",
|
9 |
+
"predicted": "keluarga",
|
10 |
+
"confidence": 0.28
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"path": "test_videos/buruk/buruk_07.mp4",
|
14 |
+
"actual": "buruk",
|
15 |
+
"predicted": "anak",
|
16 |
+
"confidence": 0.37
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"path": "test_videos/dengar/dengar_04.mp4",
|
20 |
+
"actual": "dengar",
|
21 |
+
"predicted": "maaf",
|
22 |
+
"confidence": 0.31
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"path": "test_videos/gembira/gembira_10.mp4",
|
26 |
+
"actual": "gembira",
|
27 |
+
"predicted": "buka",
|
28 |
+
"confidence": 0.71
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"path": "test_videos/ibu/ibu_03.mp4",
|
32 |
+
"actual": "ibu",
|
33 |
+
"predicted": "maaf",
|
34 |
+
"confidence": 0.19
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"path": "test_videos/kertas/kertas_01.mp4",
|
38 |
+
"actual": "kertas",
|
39 |
+
"predicted": "main",
|
40 |
+
"confidence": 0.26
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"path": "test_videos/lihat/lihat_04.mp4",
|
44 |
+
"actual": "lihat",
|
45 |
+
"predicted": "makan",
|
46 |
+
"confidence": 0.79
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"path": "test_videos/lihat/lihat_07.mp4",
|
50 |
+
"actual": "lihat",
|
51 |
+
"predicted": "senyum",
|
52 |
+
"confidence": 0.62
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"path": "test_videos/makan/makan_04.mp4",
|
56 |
+
"actual": "makan",
|
57 |
+
"predicted": "minum",
|
58 |
+
"confidence": 0.84
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"path": "test_videos/orang/orang_04.mp4",
|
62 |
+
"actual": "orang",
|
63 |
+
"predicted": "ibu",
|
64 |
+
"confidence": 0.33
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"path": "test_videos/rumah/rumah_06.mp4",
|
68 |
+
"actual": "rumah",
|
69 |
+
"predicted": "teman",
|
70 |
+
"confidence": 0.26
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"path": "test_videos/sedikit/sedikit_01.mp4",
|
74 |
+
"actual": "sedikit",
|
75 |
+
"predicted": "orang",
|
76 |
+
"confidence": 0.33
|
77 |
+
}
|
78 |
+
]
|
79 |
+
}
|
evaluation.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from app.preprocessing import VideoProcessor
|
2 |
+
from app.model import VideoClassifier
|
3 |
+
from tqdm import tqdm
|
4 |
+
import os
|
5 |
+
import json
|
6 |
+
|
7 |
+
# Initialize model
|
8 |
+
processor = VideoProcessor()
|
9 |
+
model = VideoClassifier()
|
10 |
+
|
11 |
+
total = 0
|
12 |
+
correct = 0
|
13 |
+
misclassified = []
|
14 |
+
|
15 |
+
for word in tqdm(os.listdir('test_videos'), desc='Processing words'):
|
16 |
+
for video in os.listdir(f'test_videos/{word}'):
|
17 |
+
video_path = f'test_videos/{word}/{video}'
|
18 |
+
landmarks, angles = processor.process_video(video_path)
|
19 |
+
prediction = model.predict(landmarks, angles)
|
20 |
+
|
21 |
+
total += 1
|
22 |
+
if prediction["label"] == word:
|
23 |
+
correct += 1
|
24 |
+
else:
|
25 |
+
misclassified.append({
|
26 |
+
'path': video_path,
|
27 |
+
'actual': word,
|
28 |
+
'predicted': prediction["label"],
|
29 |
+
'confidence': round(prediction["confidence"], 2)
|
30 |
+
})
|
31 |
+
|
32 |
+
eval_results = {
|
33 |
+
"accuracy": f"{round(correct / total * 100)}%",
|
34 |
+
"correct": correct,
|
35 |
+
"total": total,
|
36 |
+
"misclassified": misclassified
|
37 |
+
}
|
38 |
+
|
39 |
+
with open("eval_results.json", "w") as f:
|
40 |
+
json.dump(eval_results, f, indent=4)
|
41 |
+
|
42 |
+
print(f"\nAccuracy: {eval_results['accuracy']}")
|
43 |
+
print(f"Correct predictions: {correct}/{total}")
|
44 |
+
|
45 |
+
print("\nMisclassified examples:")
|
46 |
+
for error in misclassified:
|
47 |
+
print(f"Video: {error['path']}")
|
48 |
+
print(f"Actual: {error['actual']}")
|
49 |
+
print(f"Predicted: {error['predicted']} (confidence: {error['confidence']:.2f})")
|
50 |
+
print("-" * 50)
|
model/sign_transformer.keras
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22110b04ee93521c468a963d19f9db1a3ae7895af08a967033e79365f3495844
|
3 |
+
size 51249474
|