Spaces:
Running
Running
import gradio as gr | |
from deepface import DeepFace | |
from transformers import pipeline | |
import io | |
import base64 | |
import pandas as pd | |
import numpy as ny | |
from huggingface_hub import InferenceClient | |
get_blip = pipeline("image-to-text",model="Salesforce/blip-image-captioning-large") | |
# using deepface to detect age, gender, emotion | |
def analyze_face(image): | |
#convert PIL image to numpy array | |
image_array = ny.array(image) | |
face_result = DeepFace.analyze(image_array, actions=['age','gender','emotion'], enforce_detection=False) | |
#convert the resulting dictionary to a dataframe | |
df = pd.DataFrame(face_result) | |
return df['dominant_gender'][0],df['age'][0],df['dominant_emotion'][0] | |
#The [0] at the end is for accessing the value at the first row in a DataFrame column. | |
#using blip to generate caption | |
#image_to_base64_str function to convert image to base64 format | |
def image_to_base64_str(pil_image): | |
byte_arr = io.BytesIO() | |
pil_image.save(byte_arr, format='PNG') | |
byte_arr = byte_arr.getvalue() | |
return str(base64.b64encode(byte_arr).decode('utf-8')) | |
#captioner function to take an image | |
def captioner(image): | |
base64_image = image_to_base64_str(image) | |
caption = get_blip(base64_image) | |
return caption[0]['generated_text'] | |
#The [0] at the beginning is for accessing the first element in a container (like a list or dictionary). | |
def get_image_info(image): | |
#call captioner() function | |
image_caption = captioner(image) | |
#call analyze_face() function | |
gender, age, emotion = analyze_face(image) | |
#return image_caption,face_attributes | |
return image_caption, gender, age, emotion | |
client = InferenceClient( | |
"mistralai/Mistral-7B-Instruct-v0.1" | |
) | |
def generate(image, temperature=0.9, max_new_tokens=1500, top_p=0.95, repetition_penalty=1.0): | |
image_caption, gender, age, emotion = get_image_info(image) | |
#prompt = f"[INS] Generate a story based on person’s emotion: {emotion}, age: {age}, gender: {gender} of the image, and image’s caption: {image_caption}.[/INS]" | |
prompt = ( | |
f"[INS] Please generate a detailed and engaging story based on the person's emotion: {emotion}, " | |
f"age: {age}, and gender: {gender} shown in the image. Begin with the scene described in the image's caption: '{image_caption}'. " | |
f"The generated story should include a beginning, middle, and end.[/INS]" | |
) | |
print("prompt:",prompt) | |
temperature = float(temperature) | |
if temperature < 1e-2: | |
temperature = 1e-2 | |
top_p = float(top_p) | |
generate_kwargs = dict( | |
temperature=temperature, | |
max_new_tokens=max_new_tokens, | |
top_p=top_p, | |
repetition_penalty=repetition_penalty, | |
do_sample=True, | |
seed=42, | |
) | |
stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) | |
output = "" | |
for response in stream: | |
output += response.token.text | |
# yield "".join(output) | |
yield output | |
return output | |
demo = gr.Interface(fn=generate, | |
inputs=[ | |
#gr.Video(sources=["webcam"], label="video") | |
gr.Image(sources=["upload", "webcam"], label="Upload Image", type="pil"), | |
gr.Slider( | |
label="Temperature", | |
value=0.9, | |
minimum=0.0, | |
maximum=1.0, | |
step=0.05, | |
interactive=True, | |
info="Higher values produce more diverse outputs", | |
), | |
gr.Slider( | |
label="Max new tokens", | |
value=1500, | |
minimum=0, | |
maximum=3000, | |
step=1.0, | |
interactive=True, | |
info="The maximum numbers of new tokens"), | |
gr.Slider( | |
label="Top-p (nucleus sampling)", | |
value=0.90, | |
minimum=0.0, | |
maximum=1, | |
step=0.05, | |
interactive=True, | |
info="Higher values sample more low-probability tokens", | |
), | |
gr.Slider( | |
label="Repetition penalty", | |
value=1.2, | |
minimum=1.0, | |
maximum=2.0, | |
step=0.05, | |
interactive=True, | |
info="Penalize repeated tokens", | |
) | |
], | |
outputs=[gr.Textbox(label="Generated Story")], | |
title="story generation", | |
description="generate a story for you", | |
allow_flagging="never" | |
) | |
demo.launch(debug=(True)) | |