# -*- coding: utf-8 -*- import locale print(locale.getpreferredencoding()) from transformers import AutoConfig, AutoModelForCausalLM,pipeline,AutoTokenizer from datasets import load_dataset MODEL_DIR = "/home/deepak/sources/gpt2-tamil/gpt2-tamil/" #get prompt from dataset, will be replaced by manual prompt once I figure out how to render tamil font dataset = load_dataset("oscar", "unshuffled_deduplicated_ta", split="train") id =232 print(dataset[id]['text']) tamil_prompt =dataset[id]['text'] # Get configuration and the model config = AutoConfig.from_pretrained(MODEL_DIR) model = AutoModelForCausalLM.from_config(config) tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR) generator= pipeline('text-generation', model=model, tokenizer=tokenizer) model_output = generator(tamil_prompt, max_length=30, num_return_sequences=5) print(model_output)