BLIPvOFAde / app.py
Sophia Zell
Update app.py
f393c3e
import numpy as np
import requests
import gradio as gr
from transformers import pipeline
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-de")
def blipofasinki(input_img):
b64_string = gr.processing_utils.encode_url_or_file_to_base64(input_img)
#blip-Nucleus
responsen = requests.post(url='https://hf.space/embed/Salesforce/BLIP/+/api/predict/', json={"data": [ b64_string,"Image Captioning","None",str('Nucleus sampling')]})
jresn = responsen.json()
capn = jresn["data"][0]
offset = len(str("caption:"))
capn = capn[offset:]
trans_capn = translator(capn)
tcn = trans_capn[0]['translation_text']
#blip-beam
responseb = requests.post(url='https://hf.space/embed/Salesforce/BLIP/+/api/predict/', json={"data": [ b64_string,"Image Captioning","None",str('Beam search')]})
jresb = responseb.json()
capb = jresb["data"][0]
capb = capb[offset:]
trans_capb = translator(capb)
tcb = trans_capb[0]['translation_text']
#ofa
responseo = requests.post(url='https://hf.space/embed/OFA-Sys/OFA-Image_Caption/+/api/predict/', json={"data": [b64_string]})
jreso = responseo.json()
capo = jreso["data"][0]
trans_capo = translator(capo)
tco = trans_capo[0]['translation_text']
return [tcn, tcb, tco]
description = "A direct comparison in image captioning between BLIP and OFA (in German translated with Helsinki)."
input_ = [gr.inputs.Image(type='filepath', label="Input Image")]
output_ = [gr.outputs.Textbox(label="BLIP Nucleus sampling output"),gr.outputs.Textbox(label="BLIP Beam search output"), gr.outputs.Textbox(label="OFA output")]
iface = gr.Interface(blipofasinki, input_, output_, description=description)
iface.launch(debug=True,show_error=True)