Spaces:

Youssefk
/

WLS

Runtime error

App Files Files Community

Youssefk commited on Apr 2, 2023

Commit

82fe914

•

1 Parent(s): 3623a75

V1.0

Browse files

Files changed (1) hide show

app.py +51 -0

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from transformers import AutoTokenizer, AutoModelForCausalLM, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments
+import torch
+import streamlit as st
+# Load the pre-trained tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("togethercomputer/GPT-NeoXT-Chat-Base-20B")
+model = AutoModelForCausalLM.from_pretrained("togethercomputer/GPT-NeoXT-Chat-Base-20B")
+dataa = "My name is youssef khemiri i am 21 years old and i am a data scientist"
+# Prepare the dataset
+train_dataset = TextDataset(
+    tokenizer=tokenizer,
+    file_path=dataa,
+    block_size=128,
+)
+# Prepare the data collator
+data_collator = DataCollatorForLanguageModeling(
+    tokenizer=tokenizer, mlm=False,
+)
+# Initialize the trainer
+training_args = TrainingArguments(
+    # output_dir='./results',          # output directory
+    num_train_epochs=3,              # total number of training epochs
+    per_device_train_batch_size=16,  # batch size per device during training
+    save_steps=10_000,              # number of steps between saving checkpoints
+    save_total_limit=2,             # limit the total amount of checkpoints to save
+    prediction_loss_only=True,
+    learning_rate=5e-5,
+)
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_dataset,
+    data_collator=data_collator,
+)
+# Fine-tune the model
+trainer.train()
+st.write("finished training")
+# infer
+inputs = tokenizer("<human>: Tell me about youssef khemiri\n<bot>:", return_tensors='pt').to(model.device)
+outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
+output_str = tokenizer.decode(outputs[0])
+st.write(output_str)