cebuano-chat / app.py
syberWolf's picture
squeeze em bits
6bfeebe
from transformers import AutoTokenizer, AutoModelForMaskedLM
import torch
# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("jfernandez/cebfil-roberta")
model = AutoModelForMaskedLM.from_pretrained("jfernandez/cebfil-roberta")
# Define a function to generate responses
def generate_response(text):
# Add a mask token at the end of the text
text = text + " <mask>"
# Tokenize the text and get the input ids
inputs = tokenizer(text, return_tensors="pt")
input_ids = inputs["input_ids"]
# Get the logits from the model
outputs = model(**inputs)
logits = outputs.logits
# Get the most likely token id for the mask
mask_token_id = tokenizer.mask_token_id
mask_token_index = torch.where(input_ids == mask_token_id)[1]
token_logits = logits[0, mask_token_index, :]
top_5_tokens = torch.topk(token_logits.squeeze(), k=5).indices # get top 5 tokens
predicted_tokens = tokenizer.convert_ids_to_tokens(top_5_tokens.tolist()) # convert ids to tokens
# Choose one of the predicted tokens randomly and replace the mask with it
import random
response_token = random.choice(predicted_tokens)
response_text = text.replace("<mask>", response_token)
return response_text
# Test the function with some examples
print(generate_response("Komosta ka"))
print(generate_response("Unsa imong pangalan"))
print(generate_response("Salamat sa"))