import torch from transformers import AutoTokenizer, AutoModelForQuestionAnswering tokenizer = AutoTokenizer.from_pretrained("valhalla/longformer-base-4096-finetuned-squadv1") model = AutoModelForQuestionAnswering.from_pretrained("valhalla/longformer-base-4096-finetuned-squadv1") text = "Huggingface has democratized NLP. Huge thanks to Huggingface for this." question = "What has Huggingface done ?" encoding = tokenizer(question, text, return_tensors="pt") input_ids = encoding["input_ids"] # default is local attention everywhere # the forward method will automatically set global attention on question tokens attention_mask = encoding["attention_mask"] start_scores, end_scores = model(input_ids, attention_mask=attention_mask) all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist()) answer_tokens = all_tokens[torch.argmax(start_scores) :torch.argmax(end_scores)+1] answer = tokenizer.decode(tokenizer.convert_tokens_to_ids(answer_tokens)) # output => democratized NLP