Spaces:

mbesinci
/

GazeCorrection

Running

App Files Files Community

mbesinci commited on Nov 12, 2024

Commit

02bde05

verified ·

1 Parent(s): ade7875

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -25

app.py CHANGED Viewed

@@ -1,46 +1,55 @@
 import gradio as gr
 import cv2
 import torch
-from transformers import AutoModelForImageClassification, AutoFeatureExtractor
-# Modeli yükleyin (bu örnek bir modeldir, uygun bir model bulun)
-model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224")  # Modeli değiştirin
-feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
-# Görüntüyü modele uygun hale getirmek için yardımcı fonksiyon
-def preprocess_image(image):
-    inputs = feature_extractor(images=image, return_tensors="pt")
     return inputs['pixel_values']
-# Video işlemi fonksiyonu
-def correct_gaze_in_video(video_path):
     # Video dosyasını aç
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     # Çıkış videosu için ayarlar
-    output_path = "corrected_gaze_output.mp4"
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
-        # Göz temasını düzeltmek için her kareyi işleme
-        inputs = preprocess_image(frame)
-        with torch.no_grad():
-            outputs = model(pixel_values=inputs)
-        # Modelin çıktısını işleyin (örneğin, doğrudan göz temasına göre değişiklik yapma)
-        # corrected_frame = işlemler burada yapılabilir
-        # Çıkışı video dosyasına ekleyin
-        out.write(frame)  # corrected_frame olarak değiştirin
     # Kaynakları serbest bırak
     cap.release()
     out.release()
@@ -49,11 +58,11 @@ def correct_gaze_in_video(video_path):
 # Gradio arayüzü
 iface = gr.Interface(
-    fn=correct_gaze_in_video,
     inputs="file",
     outputs="file",
-    title="Gaze Correction in Video",
-    description="Upload a video to correct gaze direction."
 )
 iface.launch()

 import gradio as gr
 import cv2
 import torch
+from transformers import VideoMAEForVideoClassification, AutoFeatureExtractor
+# Göz teması algılama modeli ve özellik çıkarıcıyı yükleyin
+model_name = "kanlo/videomae-base-ASD_Eye_Contact_v1"
+model = VideoMAEForVideoClassification.from_pretrained(model_name)
+feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
+def preprocess_frames(frames):
+    # Her kareyi modele uygun şekilde işleyin
+    inputs = feature_extractor(frames, return_tensors="pt")
     return inputs['pixel_values']
+def detect_eye_contact(video_path):
     # Video dosyasını aç
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     # Çıkış videosu için ayarlar
+    output_path = "eye_contact_output.mp4"
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+    frames = []
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
+        frames.append(frame)
+        # Modeli belirli bir aralıkta çalıştırarak göz temasını algılayın
+        if len(frames) >= 16:  # 16 karede bir işlem yapıyoruz
+            inputs = preprocess_frames(frames)
+            with torch.no_grad():
+                outputs = model(pixel_values=inputs)
+                prediction = outputs.logits.argmax(-1).item()
+            # Göz teması varsa çerçeveye ek açıklama ekleyin
+            for frame in frames:
+                if prediction == 1:  # 1 göz teması var anlamına gelir (modelde böyle olduğunu varsayıyoruz)
+                    cv2.putText(frame, "Eye Contact", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
+                else:
+                    cv2.putText(frame, "No Eye Contact", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+                out.write(frame)
+            frames = []  # 16 karelik grubu işledikten sonra sıfırlayın
     # Kaynakları serbest bırak
     cap.release()
     out.release()
 # Gradio arayüzü
 iface = gr.Interface(
+    fn=detect_eye_contact,
     inputs="file",
     outputs="file",
+    title="Eye Contact Detection in Video",
+    description="Upload a video to detect eye contact in each frame."
 )
 iface.launch()