from transformers import AutoTokenizer, AutoModelForQuestionAnswering import torch import gradio as gr # Charger le tokenizer depuis Hugging Face Spaces tokenizer = AutoTokenizer.from_pretrained("Dofla/distilbert-squad") # Charger le modèle depuis Hugging Face Spaces model = AutoModelForQuestionAnswering.from_pretrained("Dofla/distilbert-squad") def answer_question(context, question): inputs = tokenizer.encode_plus(question, context, return_tensors="pt", padding=True, truncation=True) start_logits, end_logits = model(**inputs) outputs = model(**inputs) start_logits = outputs.start_logits end_logits = outputs.end_logits # Assurez-vous que les logits sont des tenseurs start_index = torch.argmax(start_logits, dim=1).item() end_index = torch.argmax(end_logits, dim=1).item() + 1 answer = tokenizer.decode(inputs["input_ids"][0][start_index:end_index]) return answer # Créer une interface Gradio pour l'inférence iface = gr.Interface( fn=answer_question, inputs=[ gr.Textbox(lines=7, label="Contexte"), gr.Textbox(lines=1, label="Question") ], outputs="text", title="Question Answering with Fine-Tuned Model" ) # Lancer l'interface iface.launch('share=True')