from transformers import LlamaForQuestionAnswering, LlamaTokenizer import torch # Load the pre-trained LLaMA-2 model and tokenizer # model_name = "meta-llama/llama-2-question-answering" model_name = "meta-llama/Meta-Llama-3-8B" model = LlamaForQuestionAnswering.from_pretrained(model_name) tokenizer = LlamaTokenizer.from_pretrained(model_name) # Prepare a question and context question = "What is the capital of France?" context = "France, a country in Western Europe, is known for its medieval cities, alpine villages, and Mediterranean beaches. Its capital, Paris, is famed for its fashion, gastronomy, and culture." inputs = tokenizer(question, context, return_tensors="pt") # Perform the question-answering with torch.no_grad(): outputs = model(**inputs) answer_start_index = outputs.start_logits.argmax() answer_end_index = outputs.end_logits.argmax() answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs.input_ids[0][answer_start_index:answer_end_index+1])) print(f"Answer: {answer}")