import torch import json from transformer_model import TransformerModel device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # # Hyperparameters d_model = 512 # Dimension of the embeddings and the token representations seq_length = 10 # Length of the input and output sequences vocab_size = 25672 # Size of the vocabulary batch_size = 32 # Batch size for training num_heads = 8 # Number of heads in multi-head attention dim_feedforward = 2048 # Dimension of feedforward network in encoder and decoder # Assuming the TransformerModel class is defined in the script model = TransformerModel(vocab_size, d_model, num_heads, dim_feedforward, seq_length) model.load_state_dict(torch.load('transformer_model.pth')) model.eval() # Set the model to evaluation mode # Load the vocabulary with open('vocabulary.json', 'r') as vocab_file: vocab = json.load(vocab_file) if '' not in vocab: # Assign the next integer index to vocab[''] = len(vocab) def text_to_tensor(text, vocab, seq_length): tokens = text.split() indices = [vocab.get(token, vocab['']) for token in tokens] # Replace unknown tokens with indices = indices[:seq_length] indices += [vocab['']] * (seq_length - len(indices)) return torch.tensor(indices, dtype=torch.long).unsqueeze(0) # Add batch dimension input_text = "please make the" input_tensor = text_to_tensor(input_text, vocab, seq_length) src = input_tensor tgt = input_tensor # def generate_square_subsequent_mask(sz): # mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) # mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) # return mask # def create_padding_mask(seq): # return (seq == vocab['']).transpose(0, 1) # Function to generate a square subsequent mask def generate_square_subsequent_mask(sz): mask = torch.triu(torch.ones(sz, sz, device=device), diagonal=1).bool() return mask # Function to create padding mask def create_padding_mask(seq): mask = (seq == 0).transpose(0, 1) # Assuming 0 is the padding index return mask src_seq_len = src.size(1) tgt_seq_len = tgt.size(1) src_mask = generate_square_subsequent_mask(src_seq_len) # src_mask = torch.zeros((src_seq_len, src_seq_len)).type(torch.bool) tgt_mask = generate_square_subsequent_mask(tgt_seq_len) src_key_padding_mask = create_padding_mask(src) tgt_key_padding_mask = create_padding_mask(tgt) src.size() tgt.size() src_mask.size() tgt_mask.size() src_key_padding_mask.size() tgt_key_padding_mask.size() with torch.no_grad(): output = model(src, tgt, src_mask, tgt_mask, src_key_padding_mask.transpose(0, 1), tgt_key_padding_mask.transpose(0, 1)) predicted_indices = torch.argmax(output, dim=-1).squeeze(0).tolist() predicted_indices inverse_vocab = {value: key for key, value in vocab.items()} import itertools flattened_list = list(itertools.chain.from_iterable(predicted_indices)) [inverse_vocab[key] for key in flattened_list] def generate_prediction(text, model, vocab, seq_length): model.eval() # Make sure the model is in eval mode # Convert text to tensor input_tensor = text_to_tensor(text, vocab, seq_length) # Generate prediction with torch.no_grad(): output = model(input_tensor, input_tensor) # For simplicity, using the same tensor as src and tgt # Convert output tensor to tokens (you may need additional post-processing) predicted_indices = torch.argmax(output, dim=-1).squeeze(0).tolist() predicted_tokens = [vocab[index] for index in predicted_indices] return predicted_tokens # Example usage text = """Here were the servants of your adversary And yours""" prediction = generate_prediction(text, model, vocab, seq_length) print(prediction)