from transformers import AutoConfig, AutoModelForSequenceClassification, AutoTokenizer device = "cuda" predictor = AutoModelForSequenceClassification.from_pretrained("/home/afabbri/evaluation/qafacteval/models/quip-512-mocha").to(device) predictor.eval() tokenizer = AutoTokenizer.from_pretrained("/home/afabbri/evaluation/qafacteval/models/quip-512-mocha") batch_size = 32 context = "The Yankees beat the Braves in the World Series." questions = ["Who won the World Series?", "Who beat the Braves?"] answers = ["The Yankees", "The Yankees"] predictions = ["The Yankees", "The Yankees"] batch_sentences = [] for question, answer, prediction in zip(questions, answers, predictions): sentence1 = f"{question} {answer} {prediction} {context}" batch_sentences.append(sentence1) inputs = tokenizer(batch_sentences, max_length=512, truncation=True, padding="max_length", return_tensors="pt") import pdb;pdb.set_trace() outputs = predictor(input_ids=inputs["input_ids"].to(device), attention_mask=inputs["attention_mask"].to(device)) outputs = [x[0] for x in outputs[0].cpu().tolist()] import pdb;pdb.set_trace() print("HI")