Spaces:

Louisljz
/

bisindo-sign-lang-recog

Sleeping

App Files Files Community

Louisljz commited on about 1 month ago

Commit

2f317bb

1 Parent(s): f02ac30

stable endpoint

Browse files

Files changed (9) hide show

.gitignore +2 -0
app/__pycache__/main.cpython-312.pyc +0 -0
app/__pycache__/model.cpython-312.pyc +0 -0
app/__pycache__/preprocessing.cpython-312.pyc +0 -0
app/model.py +1 -1
app/preprocessing.py +20 -1
eval_results.json +79 -0
evaluation.py +50 -0
model/sign_transformer.keras +2 -2

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ test_videos/
2	+ __pycache__/

app/__pycache__/main.cpython-312.pyc DELETED Viewed

Binary file (2.64 kB)

app/__pycache__/model.cpython-312.pyc DELETED Viewed

Binary file (3.44 kB)

app/__pycache__/preprocessing.cpython-312.pyc DELETED Viewed

Binary file (9.34 kB)

app/model.py CHANGED Viewed

@@ -36,7 +36,7 @@ class VideoClassifier:
         norm_landmarks = self.normalize_landmarks(landmarks)
         input_data = np.concatenate([np.squeeze(norm_landmarks), angles], axis=1)
-        predictions = self.model.predict(np.expand_dims(input_data, axis=0))
         # Get the predicted class and confidence
         predicted_class_idx = predictions.argmax()

         norm_landmarks = self.normalize_landmarks(landmarks)
         input_data = np.concatenate([np.squeeze(norm_landmarks), angles], axis=1)
+        predictions = self.model.predict(np.expand_dims(input_data, axis=0), verbose=0)
         # Get the predicted class and confidence
         predicted_class_idx = predictions.argmax()

app/preprocessing.py CHANGED Viewed

@@ -31,6 +31,25 @@ class VideoProcessor:
         self.model = mp.solutions.holistic.Holistic(static_image_mode=False,
                                                   min_detection_confidence=0.3,
                                                   min_tracking_confidence=0.3)
     def motion_trim(self, video_path: str) -> np.ndarray:
         try:
@@ -47,7 +66,7 @@ class VideoProcessor:
                 if not ret:
                     break
-                frame = cv2.resize(frame, (1280, 720))
                 frames.append(frame)
                 # Calculate motion score on the fly

         self.model = mp.solutions.holistic.Holistic(static_image_mode=False,
                                                   min_detection_confidence=0.3,
                                                   min_tracking_confidence=0.3)
+    def resize_to_480p(self, frame):
+        """
+        Resize the frame to 480p while maintaining the aspect ratio.
+        Handles both portrait and landscape frames.
+        """
+        height, width = frame.shape[:2]
+        if height > width:  # Portrait video
+            new_width = 480
+            scale = new_width / width
+            new_height = int(height * scale)
+        else:  # Landscape or square video
+            new_height = 480
+            scale = new_height / height
+            new_width = int(width * scale)
+        resized_frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_AREA)
+        return resized_frame
     def motion_trim(self, video_path: str) -> np.ndarray:
         try:
                 if not ret:
                     break
+                frame = self.resize_to_480p(frame)
                 frames.append(frame)
                 # Calculate motion score on the fly

eval_results.json ADDED Viewed

	@@ -0,0 +1,79 @@

+{
+    "accuracy": "80%",
+    "correct": 48,
+    "total": 60,
+    "misclassified": [
+        {
+            "path": "test_videos/anak/anak_03.mp4",
+            "actual": "anak",
+            "predicted": "keluarga",
+            "confidence": 0.28
+        },
+        {
+            "path": "test_videos/buruk/buruk_07.mp4",
+            "actual": "buruk",
+            "predicted": "anak",
+            "confidence": 0.37
+        },
+        {
+            "path": "test_videos/dengar/dengar_04.mp4",
+            "actual": "dengar",
+            "predicted": "maaf",
+            "confidence": 0.31
+        },
+        {
+            "path": "test_videos/gembira/gembira_10.mp4",
+            "actual": "gembira",
+            "predicted": "buka",
+            "confidence": 0.71
+        },
+        {
+            "path": "test_videos/ibu/ibu_03.mp4",
+            "actual": "ibu",
+            "predicted": "maaf",
+            "confidence": 0.19
+        },
+        {
+            "path": "test_videos/kertas/kertas_01.mp4",
+            "actual": "kertas",
+            "predicted": "main",
+            "confidence": 0.26
+        },
+        {
+            "path": "test_videos/lihat/lihat_04.mp4",
+            "actual": "lihat",
+            "predicted": "makan",
+            "confidence": 0.79
+        },
+        {
+            "path": "test_videos/lihat/lihat_07.mp4",
+            "actual": "lihat",
+            "predicted": "senyum",
+            "confidence": 0.62
+        },
+        {
+            "path": "test_videos/makan/makan_04.mp4",
+            "actual": "makan",
+            "predicted": "minum",
+            "confidence": 0.84
+        },
+        {
+            "path": "test_videos/orang/orang_04.mp4",
+            "actual": "orang",
+            "predicted": "ibu",
+            "confidence": 0.33
+        },
+        {
+            "path": "test_videos/rumah/rumah_06.mp4",
+            "actual": "rumah",
+            "predicted": "teman",
+            "confidence": 0.26
+        },
+        {
+            "path": "test_videos/sedikit/sedikit_01.mp4",
+            "actual": "sedikit",
+            "predicted": "orang",
+            "confidence": 0.33
+        }
+    ]
+}

evaluation.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from app.preprocessing import VideoProcessor
+from app.model import VideoClassifier
+from tqdm import tqdm
+import os
+import json
+# Initialize model
+processor = VideoProcessor()
+model = VideoClassifier()
+total = 0
+correct = 0
+misclassified = []
+for word in tqdm(os.listdir('test_videos'), desc='Processing words'):
+    for video in os.listdir(f'test_videos/{word}'):
+        video_path = f'test_videos/{word}/{video}'
+        landmarks, angles = processor.process_video(video_path)
+        prediction = model.predict(landmarks, angles)
+        total += 1
+        if prediction["label"] == word:
+            correct += 1
+        else:
+            misclassified.append({
+                'path': video_path,
+                'actual': word,
+                'predicted': prediction["label"],
+                'confidence': round(prediction["confidence"], 2)
+            })
+eval_results = {
+    "accuracy": f"{round(correct / total * 100)}%",
+    "correct": correct,
+    "total": total,
+    "misclassified": misclassified
+}
+with open("eval_results.json", "w") as f:
+    json.dump(eval_results, f, indent=4)
+print(f"\nAccuracy: {eval_results['accuracy']}")
+print(f"Correct predictions: {correct}/{total}")
+print("\nMisclassified examples:")
+for error in misclassified:
+    print(f"Video: {error['path']}")
+    print(f"Actual: {error['actual']}")
+    print(f"Predicted: {error['predicted']} (confidence: {error['confidence']:.2f})")
+    print("-" * 50)

model/sign_transformer.keras CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3442341b85d29304a78024267e7e8b83a844a84befcd06fa9f58d471f8eea8c2
-size 51249585

 version https://git-lfs.github.com/spec/v1
+oid sha256:22110b04ee93521c468a963d19f9db1a3ae7895af08a967033e79365f3495844
+size 51249474