Spaces:
Sleeping
Sleeping
File size: 1,715 Bytes
e324a14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# pip install transformers
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
res = classifier(["We are very happy to show you the 🤗 Transformers Library", "We hope you don't hate it."])
#for result in res:
# print(res)
tokens = tokenizer.tokenize("We are very happy to show you the 🤗 Transformers Library")
token_ids = tokenizer.convert_tokens_to_ids(tokens)
input_ids = tokenizer("We are very happy to show you the 🤗 Transformers Library");
#print(f' Tokens: {tokens}')
#print(f'Token IDs: {token_ids}')
#print(f'Input IDs: {input_ids}')
x_train = ["We are very happy to show you the 🤗 Transformers Library",
"We hope you don't hate it."]
batch = tokenizer(x_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
with torch.no_grad():
outputs = model(**batch, labels=torch.tensor([1,0]))
print(outputs)
predictions = F.softmax(outputs.logits, dim=1)
print(predictions)
labels = torch.argmax(predictions, dim=1)
print(labels)
labels = [model.config.id2label[label_id] for label_id in labels.tolist()]
print(labels)
save_directory = "saved"
tokenizer.save_pretrained(save_directory)
model.save_pretrained(save_directory)
tokenizer = AutoTokenizer.from_pretrained(save_directory)
model = AutoModelForSequenceClassification.from_pretrained(save_directory) |