Milestone3 / train.py
cdb24's picture
Upload 3 files
e324a14
# pip install transformers
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
res = classifier(["We are very happy to show you the 🤗 Transformers Library", "We hope you don't hate it."])
#for result in res:
# print(res)
tokens = tokenizer.tokenize("We are very happy to show you the 🤗 Transformers Library")
token_ids = tokenizer.convert_tokens_to_ids(tokens)
input_ids = tokenizer("We are very happy to show you the 🤗 Transformers Library");
#print(f' Tokens: {tokens}')
#print(f'Token IDs: {token_ids}')
#print(f'Input IDs: {input_ids}')
x_train = ["We are very happy to show you the 🤗 Transformers Library",
"We hope you don't hate it."]
batch = tokenizer(x_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
with torch.no_grad():
outputs = model(**batch, labels=torch.tensor([1,0]))
print(outputs)
predictions = F.softmax(outputs.logits, dim=1)
print(predictions)
labels = torch.argmax(predictions, dim=1)
print(labels)
labels = [model.config.id2label[label_id] for label_id in labels.tolist()]
print(labels)
save_directory = "saved"
tokenizer.save_pretrained(save_directory)
model.save_pretrained(save_directory)
tokenizer = AutoTokenizer.from_pretrained(save_directory)
model = AutoModelForSequenceClassification.from_pretrained(save_directory)