import torch import Train import Tokenizer def load_model(model_path, d_model, ffn_hidden, num_heads, drop_prob, num_layers): model = Train.Transformer(d_model, ffn_hidden, num_heads, drop_prob, num_layers) model.load_state_dict(torch.load(model_path)) model.eval() return model def prepare_input(input_text): input_tokens = Tokenizer.tokenize_sequence(input_text) # input_tokens = Tokenizer.pad_to_length(input_tokens, Train.max_sequence_length) input_ids = torch.tensor(input_tokens).unsqueeze(0) # Add batch dimension return input_ids # def generate_output(model, input_ids): # with torch.no_grad(): # output_logits = model(input_ids) # predicted_token_ids = torch.argmax(output_logits, dim=-1) # output_text = Tokenizer.detokenize_sequence(predicted_token_ids[0].tolist()) # return output_text # def generate_output(model, input_ids, max_length, eos_token=Tokenizer.vocabulary.get('')): # with torch.no_grad(): # No need to track gradients during inference # input_tensor = torch.tensor(input_ids) # output_seq = [] # # for i in range(50): # output = model.generate(input_tensor) # print(f'output.size(): {output.size()}') # next_token = torch.argmax(output[0, i, :], dim=-1).item() # Take last token from sequence # output_seq.append(next_token) # if next_token == eos_token: # break # Stop if EOS token is generated # next_token_tensor = torch.tensor([[next_token]]).to(Train.device) # Convert and move to device # input_tensor = torch.cat([input_tensor, next_token_tensor], dim=1) # Concatenate # print(f'Generated tokens: {output_seq}') # AI_response = Tokenizer.detokenize_sequence(output_seq) # return AI_response def generate_output(model, input_ids, max_length, eos_token=Tokenizer.vocabulary.get('')): out = model.generate(input_ids) preds = torch.argmax(out, dim=-1) output_tokens = [] for token in preds[0]: output_tokens.append(token.item()) AI_response = Tokenizer.detokenize_sequence(output_tokens) return AI_response # Example usage model_path = 'models/my_model.pt' input_text = '' model = load_model(model_path, Train.d_model, Train.ffn_hidden, Train.num_heads, Train.drop_prob, Train.num_layers) model.to(Train.device) input_ids = prepare_input(input_text) output_text = generate_output(model, input_ids.to(Train.device), Train.max_sequence_length) print("Generated Output:", output_text)