| from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments | |
| from datasets import load_dataset | |
| # Load model and tokenizer | |
| model_name = "microsoft/Phi-3-mini-128k-instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| # Load dataset | |
| dataset = load_dataset("text", data_files="combined_tokenized_data.txt")["train"] | |
| # ... (rest of your code for tokenization, data collator, training arguments, etc.) | |