|
import gradio as gr |
|
|
|
from PIL import Image |
|
|
|
import json |
|
import numpy as np |
|
|
|
|
|
import prometheus_client as prom |
|
import pandas as pd |
|
|
|
import os |
|
from transformers import VisionEncoderDecoderModel,pipeline, ViTImageProcessor, AutoTokenizer |
|
import torch |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_data=pd.read_csv("test.csv") |
|
|
|
|
|
f1_metric = prom.Gauge('bertscore_f1_score', 'F1 score for captions') |
|
|
|
|
|
|
|
|
|
def update_metrics(): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
labels_ids = eval_pred.label_ids |
|
pred_ids = eval_pred.predictions |
|
|
|
|
|
pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True) |
|
labels_ids[labels_ids == -100] = tokenizer.pad_token_id |
|
label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True) |
|
|
|
|
|
rouge_output = dict_metrics["rouge2"].compute(predictions=pred_str, references=label_str, rouge_types=["rouge2"]) |
|
dict_metric_scores["rouge2_score"] = rouge_output['rouge2'] |
|
|
|
|
|
|
|
bertscore_output = dict_metrics["bertscore"].compute(predictions=pred_str, references=label_str, lang="en") |
|
|
|
bert_f1_metric = bertscore_output['f1'] |
|
f1_metric.set(bert_f1_metric) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with open("model/config.json") as f: |
|
n=json.load(f) |
|
encoder_name_or_path=n["encoder"]["_name_or_path"] |
|
decoder_name_or_path=n["decoder"]["_name_or_path"] |
|
|
|
|
|
print(encoder_name_or_path,decoder_name_or_path,) |
|
model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(encoder_name_or_path,decoder_name_or_path) |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(decoder_name_or_path) |
|
tokenizer.pad_token = tokenizer.unk_token |
|
|
|
|
|
|
|
feature_extractor = ViTImageProcessor.from_pretrained(encoder_name_or_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def predict_event(image): |
|
|
|
generated_caption = tokenizer.decode(model.generate(feature_extractor(image, return_tensors="pt").pixel_values.to(device))[0]) |
|
|
|
return '\033[96m' +generated_caption+ '\033[0m' |
|
|
|
|
|
|
|
|
|
@app.get("/metrics") |
|
async def get_metrics(): |
|
update_metrics() |
|
return Response(media_type="text/plain", content= prom.generate_latest()) |
|
|
|
|
|
|
|
title = "capstone" |
|
description = "final capstone" |
|
|
|
|
|
|
|
|
|
iface = gr.Interface(predict_event, |
|
inputs=["image"], |
|
|
|
outputs=["text"] ) |
|
|
|
|
|
iface.launch() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|