import gradio as gr from PIL import Image # import pickle import json import numpy as np from fastapi import FastAPI,Response # from sklearn.metrics import accuracy_score, f1_score import prometheus_client as prom import pandas as pd import uvicorn import os from transformers import VisionEncoderDecoderModel,pipeline, ViTImageProcessor, AutoTokenizer import torch #model # loaded_model = pickle.load(open(save_file_name, 'rb')) app=FastAPI() # test_data=pd.read_csv("caption.txt") f1_metric = prom.Gauge('bertscore_f1_score', 'F1 score for captions') # Function for updating metrics # return dict_metric_scores #bertscore or rougue with open("model/config.json") as f: n=json.load(f) encoder_name_or_path=n["encoder"]["_name_or_path"] decoder_name_or_path=n["decoder"]["_name_or_path"] print(encoder_name_or_path,decoder_name_or_path,) model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(encoder_name_or_path,decoder_name_or_path) tokenizer = AutoTokenizer.from_pretrained(decoder_name_or_path) tokenizer.pad_token = tokenizer.unk_token feature_extractor = ViTImageProcessor.from_pretrained(encoder_name_or_path) device = "cuda" if torch.cuda.is_available() else "cpu" # cap_model.to(device) # def generate_caption(model, image, tokenizer=None): # generated_ids = model.generate(pixel_values=inputs.pixel_values) # print("generated_ids",generated_ids) # if tokenizer is not None: # print("tokenizer not null--",tokenizer) # generated_caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] # else: # print("tokenizer null--",tokenizer) # generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] # return generated_caption def predict_event(image): generated_caption = tokenizer.decode(model.generate(feature_extractor(image, return_tensors="pt").pixel_values.to(device))[0]) return '\033[96m' +generated_caption+ '\033[0m' def update_metrics(): image="cutegirl.jpeg" generated_caption = tokenizer.decode(model.generate(feature_extractor(Image.open(image), return_tensors="pt",max_length=16,num_beams = 4).pixel_values.to(device))[0]) dict_metrics = {"rouge2": evaluate.load("rouge"), "bleu": evaluate.load('bleu'), "bertscore": evaluate.load("bertscore"), "meteor": evaluate.load('meteor') } # labels_ids = eval_pred.label_ids # pred_ids = eval_pred.predictions # # all unnecessary tokens are removed # pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True) # labels_ids[labels_ids == -100] = tokenizer.pad_token_id # label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True) # calculating various metrics # rouge_output = dict_metrics["rouge2"].compute(predictions=generated_captions, references="A little girl climbing into a wooden playhouse", rouge_types=["rouge2"]) # dict_metric_scores["rouge2_score"] = rouge_output['rouge2'] bertscore_output = dict_metrics["bertscore"].compute(predictions=generated_captions, references="A little girl climbing into a wooden playhouse", lang="en") bert_f1_metric = bertscore_output['f1'] f1_metric.set(bert_f1_metric) @app.get("/metrics") async def get_metrics(): update_metrics() return Response(media_type="text/plain", content= prom.generate_latest()) title = "capstone" description = "final capstone" # inputs=gr.inputs.Image(type="pil") iface = gr.Interface(predict_event, inputs=["image"], # gr.Image(type="pil"), outputs=["text"] ) # iface.launch() app = gr.mount_gradio_app(app, iface, path="/") # iface.launch(server_name = "0.0.0.0", server_port = 8001,share=True) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8001)