import gradio as gr
from deepface import DeepFace
from transformers import pipeline
import io
import base64
import pandas as pd
import numpy as ny
from huggingface_hub import InferenceClient

get_blip = pipeline("image-to-text",model="Salesforce/blip-image-captioning-large")

# using deepface to detect age, gender, emotion
def analyze_face(image):
    #convert PIL image to numpy array
    image_array = ny.array(image)
    face_result = DeepFace.analyze(image_array, actions=['age','gender','emotion'], enforce_detection=False)
    #convert the resulting dictionary to a dataframe
    df = pd.DataFrame(face_result)
    return df['dominant_gender'][0],df['age'][0],df['dominant_emotion'][0]
    #The [0] at the end is for accessing the value at the first row in a DataFrame column.

#using blip to generate caption
#image_to_base64_str function to convert image to base64 format
def image_to_base64_str(pil_image):
    byte_arr = io.BytesIO()
    pil_image.save(byte_arr, format='PNG')
    byte_arr = byte_arr.getvalue()
    return str(base64.b64encode(byte_arr).decode('utf-8'))
#captioner function to take an image
def captioner(image):
    base64_image = image_to_base64_str(image)
    caption = get_blip(base64_image)
    return caption[0]['generated_text']
    #The [0] at the beginning is for accessing the first element in a container (like a list or dictionary).


def get_image_info(image):
    #call captioner() function
    image_caption = captioner(image)

    #call analyze_face() function
    gender, age, emotion = analyze_face(image)

    #return image_caption,face_attributes
    return image_caption, gender, age, emotion

    
client = InferenceClient(
    "mistralai/Mistral-7B-Instruct-v0.1"
)

def generate(image, temperature=0.9, max_new_tokens=1500, top_p=0.95, repetition_penalty=1.0):   
    image_caption, gender, age, emotion = get_image_info(image)   
     
    #prompt = f"[INS] Generate a story based on person’s emotion: {emotion}, age: {age}, gender: {gender} of the image, and image’s caption: {image_caption}.[/INS]"
    prompt = (
        f"[INS] Please generate a detailed and engaging story based on the person's emotion: {emotion}, "
        f"age: {age}, and gender: {gender} shown in the image. Begin with the scene described in the image's caption: '{image_caption}'. "
        f"The generated story should include a beginning, middle, and end.[/INS]"
    )
    
    print("prompt:",prompt)
    
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )
    stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        output += response.token.text
        # yield "".join(output)
        yield output
    return output


demo = gr.Interface(fn=generate,
        inputs=[
            #gr.Video(sources=["webcam"], label="video")

            gr.Image(sources=["upload", "webcam"], label="Upload Image", type="pil"),
            
            gr.Slider(
                label="Temperature",
                value=0.9,
                minimum=0.0,
                maximum=1.0,
                step=0.05,
                interactive=True,
                info="Higher values produce more diverse outputs",
            ),

            gr.Slider(
                label="Max new tokens",
                value=1500,
                minimum=0,
                maximum=3000,
                step=1.0,
                interactive=True,
                info="The maximum numbers of new tokens"),

            gr.Slider(
                label="Top-p (nucleus sampling)",
                value=0.90,
                minimum=0.0,
                maximum=1,
                step=0.05,
                interactive=True,
                info="Higher values sample more low-probability tokens",
            ),
            gr.Slider(
                label="Repetition penalty",
                value=1.2,
                minimum=1.0,
                maximum=2.0,
                step=0.05,
                interactive=True,
                info="Penalize repeated tokens",
            )
        ],
        outputs=[gr.Textbox(label="Generated Story")],
        title="story generation",
        description="generate a story for you",
        allow_flagging="never"

                    )
demo.launch(debug=(True))