from transformers import GPT2LMHeadModel, GPT2Tokenizer | |
import torch | |
# Load the model and tokenizer | |
model_path = 'model_data/finetuned_gpt' | |
tokenizer = GPT2Tokenizer.from_pretrained(model_path) | |
model = GPT2LMHeadModel.from_pretrained(model_path) | |
# Move model to GPU if available | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
def generate_text(prompt_text, length, temperature, beams): | |
encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors="pt") | |
encoded_prompt = encoded_prompt.to(device) | |
output_sequences = model.generate( | |
input_ids=encoded_prompt, | |
max_length=length, | |
temperature=temperature, | |
top_k=20, | |
top_p=0.9, | |
repetition_penalty=1.2, | |
do_sample=True, | |
num_return_sequences=beams, | |
) | |
# Decode the generated text | |
generated_sequence = output_sequences[0].tolist() | |
text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True) | |
# Remove the prompt from the generated text | |
text = text[len(tokenizer.decode(encoded_prompt[0], clean_up_tokenization_spaces=True)) :] | |
return text.strip() | |
# Streamlit interface | |