Spaces:

Garvitj
/

emotion-llm

Sleeping

App Files Files Community

Garvitj commited on Feb 23

Commit

8a73e97

verified ·

1 Parent(s): 3b6bab2

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -12

app.py CHANGED Viewed

@@ -317,24 +317,31 @@ client = InferenceClient(MODEL_NAME)
 # if __name__ == "__main__":
 #     demo.launch()
 def respond(video, text_input, history):
     """Processes user input (video, text, or both) and generates a chatbot response."""
     messages = []
     system_prompt = "You are a chatbot that can analyze emotions from videos and respond accordingly."
     if video is not None:
-        video_path = video.name  # Get uploaded video path
-        result = transcribe_and_predict_video(video_path)
-        system_prompt += f"\n\nDetected Emotions:\n"
-        system_prompt += f"- Text Emotion: {result['text_emotion']}\n"
-        system_prompt += f"- Audio Emotion: {result['audio_emotion']}\n"
-        system_prompt += f"- Image Emotion: {result['image_emotion']}\n\n"
-        system_prompt += f"Extracted Speech: {result['extracted_text']}\n"
-        messages.append({"role": "user", "content": result["extracted_text"]})  # Add extracted speech
     if text_input:
         messages.append({"role": "user", "content": text_input})  # Add text input if provided
@@ -358,7 +365,6 @@ def respond(video, text_input, history):
             yield response
     except Exception as e:
         yield f"Error: {str(e)}"
 # Define ChatGPT-style UI
 with gr.Blocks(theme="soft") as demo:
     gr.Markdown("<h2 align='center'>📹🎤💬 Multi-Modal Chatbot (Video + Text) </h2>")

 # if __name__ == "__main__":
 #     demo.launch()
 def respond(video, text_input, history):
     """Processes user input (video, text, or both) and generates a chatbot response."""
     messages = []
     system_prompt = "You are a chatbot that can analyze emotions from videos and respond accordingly."
     if video is not None:
+        if isinstance(video, str):
+            video_path = video  # If video is already a string (path), use it directly
+        elif hasattr(video, "name"):
+            video_path = video.name  # If video is a file object, get its name
+        else:
+            yield "Error: Invalid video format."
+            return
+        try:
+            result = transcribe_and_predict_video(video_path)
+            system_prompt += f"\n\nDetected Emotions:\n"
+            system_prompt += f"- Text Emotion: {result['text_emotion']}\n"
+            system_prompt += f"- Audio Emotion: {result['audio_emotion']}\n"
+            system_prompt += f"- Image Emotion: {result['image_emotion']}\n\n"
+            system_prompt += f"Extracted Speech: {result['extracted_text']}\n"
+            messages.append({"role": "user", "content": result["extracted_text"]})  # Add extracted speech
+        except Exception as e:
+            yield f"Error processing video: {str(e)}"
+            return
     if text_input:
         messages.append({"role": "user", "content": text_input})  # Add text input if provided
             yield response
     except Exception as e:
         yield f"Error: {str(e)}"
 # Define ChatGPT-style UI
 with gr.Blocks(theme="soft") as demo:
     gr.Markdown("<h2 align='center'>📹🎤💬 Multi-Modal Chatbot (Video + Text) </h2>")