gpt2-rlhf-reward / README.md
sugam11's picture
Update README.md
1c80254
metadata
license: mit
datasets:
  - Anthropic/hh-rlhf
from transformers import AutoTokenizer, GPT2ForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialogRPT-updown")
model = GPT2ForSequenceClassification.from_pretrained("sugam11/gpt2-rlhf-reward")
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
with torch.no_grad():
  logits = model(**inputs).logits