import numpy as np
import requests
import gradio as gr
from transformers import pipeline

translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-de")

def blipofasinki(input_img): 
    b64_string = gr.processing_utils.encode_url_or_file_to_base64(input_img)
    #blip-Nucleus
    responsen = requests.post(url='https://hf.space/embed/Salesforce/BLIP/+/api/predict/', json={"data": [ b64_string,"Image Captioning","None",str('Nucleus sampling')]})
    jresn = responsen.json()
    capn = jresn["data"][0]
    offset = len(str("caption:"))
    capn = capn[offset:]
    trans_capn = translator(capn)
    tcn = trans_capn[0]['translation_text']
    #blip-beam
    responseb = requests.post(url='https://hf.space/embed/Salesforce/BLIP/+/api/predict/', json={"data": [ b64_string,"Image Captioning","None",str('Beam search')]})
    jresb = responseb.json()
    capb = jresb["data"][0]
    capb = capb[offset:]
    trans_capb = translator(capb)
    tcb = trans_capb[0]['translation_text']
    #ofa
    responseo = requests.post(url='https://hf.space/embed/OFA-Sys/OFA-Image_Caption/+/api/predict/', json={"data": [b64_string]})
    jreso = responseo.json()
    capo = jreso["data"][0]
    trans_capo = translator(capo)
    tco = trans_capo[0]['translation_text']
    return [tcn, tcb, tco]
    

description = "A direct comparison in image captioning between BLIP and OFA (in German translated with Helsinki)."

input_ = [gr.inputs.Image(type='filepath', label="Input Image")]

output_ = [gr.outputs.Textbox(label="BLIP Nucleus sampling output"),gr.outputs.Textbox(label="BLIP Beam search output"), gr.outputs.Textbox(label="OFA output")]

iface = gr.Interface(blipofasinki, input_, output_, description=description)

iface.launch(debug=True,show_error=True)