import numpy as np import requests import gradio as gr from transformers import pipeline translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-de") def blipofasinki(input_img): b64_string = gr.processing_utils.encode_url_or_file_to_base64(input_img) #blip-Nucleus responsen = requests.post(url='https://hf.space/embed/Salesforce/BLIP/+/api/predict/', json={"data": [ b64_string,"Image Captioning","None",str('Nucleus sampling')]}) jresn = responsen.json() capn = jresn["data"][0] offset = len(str("caption:")) capn = capn[offset:] trans_capn = translator(capn) tcn = trans_capn[0]['translation_text'] #blip-beam responseb = requests.post(url='https://hf.space/embed/Salesforce/BLIP/+/api/predict/', json={"data": [ b64_string,"Image Captioning","None",str('Beam search')]}) jresb = responseb.json() capb = jresb["data"][0] capb = capb[offset:] trans_capb = translator(capb) tcb = trans_capb[0]['translation_text'] #ofa responseo = requests.post(url='https://hf.space/embed/OFA-Sys/OFA-Image_Caption/+/api/predict/', json={"data": [b64_string]}) jreso = responseo.json() capo = jreso["data"][0] trans_capo = translator(capo) tco = trans_capo[0]['translation_text'] return [tcn, tcb, tco] description = "A direct comparison in image captioning between BLIP and OFA (in German translated with Helsinki)." input_ = [gr.inputs.Image(type='filepath', label="Input Image")] output_ = [gr.outputs.Textbox(label="BLIP Nucleus sampling output"),gr.outputs.Textbox(label="BLIP Beam search output"), gr.outputs.Textbox(label="OFA output")] iface = gr.Interface(blipofasinki, input_, output_, description=description) iface.launch(debug=True,show_error=True)