Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import LlamaTokenizer, LlamaForCausalLM | |
import tempfile | |
import numpy as np | |
# Initialize LLaMA Model for Question Answering | |
llama_tokenizer = LlamaTokenizer.from_pretrained('huggingface/llama-7b') | |
llama_model = LlamaForCausalLM.from_pretrained('huggingface/llama-7b') | |
# Updated transcribe_and_predict_video function from your code | |
def transcribe_and_predict_video(video): | |
# Process video frames for image-based emotion recognition | |
image_emotion = process_video(video) | |
# Process audio for text and audio-based emotion recognition | |
text_emotion, audio_emotion = process_audio_from_video(video) | |
# Determine the overall emotion (could be based on majority vote or some other logic) | |
overall_emotion = Counter([text_emotion, audio_emotion, image_emotion]).most_common(1)[0][0] | |
return overall_emotion | |
# Emotion-aware Question Answering with LLM | |
def emotion_aware_qa(question, video): | |
# Get the emotion from the video (this uses the emotion detection you already implemented) | |
detected_emotion = transcribe_and_predict_video(video) | |
# Create a custom response context based on the detected emotion | |
if detected_emotion == 'joy': | |
emotion_context = "You're in a good mood! Let's keep the positivity going." | |
elif detected_emotion == 'sadness': | |
emotion_context = "It seems like you're feeling a bit down. Let me help with that." | |
elif detected_emotion == 'anger': | |
emotion_context = "I sense some frustration. Let's work through it together." | |
elif detected_emotion == 'fear': | |
emotion_context = "It sounds like you're anxious. How can I assist in calming things down?" | |
elif detected_emotion == 'neutral': | |
emotion_context = "You're feeling neutral. How can I help you today?" | |
else: | |
emotion_context = "You're in an uncertain emotional state. Let me guide you." | |
# Prepare the prompt for LLaMA, including emotion context and user question | |
prompt = f"{emotion_context} User asks: {question}" | |
# Tokenize and generate response from LLaMA | |
inputs = llama_tokenizer(prompt, return_tensors="pt") | |
outputs = llama_model.generate(inputs['input_ids'], max_length=150) | |
answer = llama_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return answer | |
# Create Gradio interface to interact with the LLM and video emotion detection | |
def gradio_interface(question, video): | |
response = emotion_aware_qa(question, video) | |
return response | |
iface = gr.Interface(fn=gradio_interface, | |
inputs=["text", gr.Video()], | |
outputs="text", | |
title="Emotion-Aware Question Answering", | |
description="Ask a question and get an emotion-aware response based on the video.") | |
iface.launch() | |