Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -322,42 +322,41 @@ def respond(video, text_input, history):
|
|
322 |
messages = []
|
323 |
system_prompt = "You are a chatbot that can analyze emotions from videos and respond accordingly."
|
324 |
|
325 |
-
|
326 |
-
|
327 |
-
video_path = video # If video is already a string (path), use it directly
|
328 |
-
elif hasattr(video, "name"):
|
329 |
-
video_path = video.name # If video is a file object, get its name
|
330 |
-
else:
|
331 |
-
yield "Error: Invalid video format."
|
332 |
-
return
|
333 |
-
|
334 |
try:
|
|
|
|
|
|
|
|
|
335 |
result = transcribe_and_predict_video(video_path)
|
|
|
336 |
system_prompt += f"\n\nDetected Emotions:\n"
|
337 |
system_prompt += f"- Text Emotion: {result['text_emotion']}\n"
|
338 |
system_prompt += f"- Audio Emotion: {result['audio_emotion']}\n"
|
339 |
system_prompt += f"- Image Emotion: {result['image_emotion']}\n\n"
|
340 |
system_prompt += f"Extracted Speech: {result['extracted_text']}\n"
|
|
|
341 |
messages.append({"role": "user", "content": result["extracted_text"]}) # Add extracted speech
|
342 |
except Exception as e:
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
if
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
|
356 |
# Include system prompt
|
357 |
messages.insert(0, {"role": "system", "content": system_prompt})
|
358 |
|
359 |
response = ""
|
360 |
-
|
361 |
try:
|
362 |
for message in client.chat_completion(messages, max_tokens=512, stream=True, temperature=0.7, top_p=0.95):
|
363 |
token = message.choices[0].delta.content if message.choices[0].delta else ""
|
@@ -365,6 +364,7 @@ def respond(video, text_input, history):
|
|
365 |
yield response
|
366 |
except Exception as e:
|
367 |
yield f"Error: {str(e)}"
|
|
|
368 |
# Define ChatGPT-style UI
|
369 |
with gr.Blocks(theme="soft") as demo:
|
370 |
gr.Markdown("<h2 align='center'>📹🎤💬 Multi-Modal Chatbot (Video + Text) </h2>")
|
|
|
322 |
messages = []
|
323 |
system_prompt = "You are a chatbot that can analyze emotions from videos and respond accordingly."
|
324 |
|
325 |
+
# Handle video input safely
|
326 |
+
if video:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
try:
|
328 |
+
video_path = video if isinstance(video, str) else getattr(video, "name", None)
|
329 |
+
if not video_path:
|
330 |
+
return "Error: Invalid video input."
|
331 |
+
|
332 |
result = transcribe_and_predict_video(video_path)
|
333 |
+
|
334 |
system_prompt += f"\n\nDetected Emotions:\n"
|
335 |
system_prompt += f"- Text Emotion: {result['text_emotion']}\n"
|
336 |
system_prompt += f"- Audio Emotion: {result['audio_emotion']}\n"
|
337 |
system_prompt += f"- Image Emotion: {result['image_emotion']}\n\n"
|
338 |
system_prompt += f"Extracted Speech: {result['extracted_text']}\n"
|
339 |
+
|
340 |
messages.append({"role": "user", "content": result["extracted_text"]}) # Add extracted speech
|
341 |
except Exception as e:
|
342 |
+
return f"Error processing video: {str(e)}"
|
343 |
+
|
344 |
+
# Ensure history is a list
|
345 |
+
if isinstance(history, list):
|
346 |
+
for val in history:
|
347 |
+
if isinstance(val, (list, tuple)) and len(val) == 2:
|
348 |
+
if val[0]:
|
349 |
+
messages.append({"role": "user", "content": val[0]})
|
350 |
+
if val[1]:
|
351 |
+
messages.append({"role": "assistant", "content": val[1]})
|
352 |
+
else:
|
353 |
+
return "Error: Chat history is not in the correct format."
|
354 |
|
355 |
# Include system prompt
|
356 |
messages.insert(0, {"role": "system", "content": system_prompt})
|
357 |
|
358 |
response = ""
|
359 |
+
|
360 |
try:
|
361 |
for message in client.chat_completion(messages, max_tokens=512, stream=True, temperature=0.7, top_p=0.95):
|
362 |
token = message.choices[0].delta.content if message.choices[0].delta else ""
|
|
|
364 |
yield response
|
365 |
except Exception as e:
|
366 |
yield f"Error: {str(e)}"
|
367 |
+
|
368 |
# Define ChatGPT-style UI
|
369 |
with gr.Blocks(theme="soft") as demo:
|
370 |
gr.Markdown("<h2 align='center'>📹🎤💬 Multi-Modal Chatbot (Video + Text) </h2>")
|