from transformers import pipeline import wikipedia wikipedia.set_lang("es") import random import gradio as gr import textwrap model_name = "PlanTL-GOB-ES/roberta-base-bne-sqac" nlp = pipeline('question-answering', model=model_name, tokenizer=model_name) def get_wiki_article(topic): topic=topic try: search = wikipedia.search(topic, results = 1)[0] except wikipedia.DisambiguationError as e: choices = [x for x in e.options if ('disambiguation' not in x) and ('All pages' not in x) and (x!=topic)] search = random.choice(choices) try: p = wikipedia.page(search) except wikipedia.exceptions.DisambiguationError as e: choices = [x for x in e.options if ('disambiguation' not in x) and ('All pages' not in x) and (x!=topic)] s = random.choice(choices) p = wikipedia.page(s) return p.content, p.url def get_answer(topic, question): w_art, w_url=get_wiki_article(topic) if len(w_art) < 7000: #w_art = w_art[:3000] qa = {'question': question, 'context': w_art} res = nlp(qa) res['type'] = "one-pass" if res['answer'] not in ['\n',' ','']: ans = res['answer'] if res["start"] < 1000: st = 0 else: st = res["start"] - 1000 en = res["end"]+1000 contextual = w_art[st:res["start"]]+ " --> "+ans+ " <-- "+w_art[res["end"]:en] #thetext = w_art[res['start']-500:res['start']+500:] res['text'] = contextual #thetext = w_art[res['start']-200:res['start']+200:] #res['text'] = thetext else: res = recursive_rank(w_art,question) res['type'] = "recursive" return res['answer'], res['text'],''+w_url+'', {'confidence':res['score']},res['type'] def recursive_rank(w_art,question): chk = 1000 parts = round(len(w_art)/1000) size = round(len(w_art)/parts) trozos = textwrap.wrap(w_art, size) answers = [] for chunk in trozos: if len(chunk) < 1000: pass else: qa = {'question': question, 'context': chunk} res = nlp(qa) if res['answer'] not in ['\n',' ','']: ans = res['answer'] if res["start"] < chk: st = 0#0newres["start"] else: st = res["start"] - chk # if newres["end"] < 500: # en = newres["end"] # else: en = res["end"]+chk contextual = chunk[st:res["start"]]+ " --> "+ans+ " <-- "+chunk[res["end"]:en] thetext = contextual#chunk[res['start']-300:res['start']+300:] res['text'] = thetext answers.append(res) answers.sort(key=lambda x: x['score'], reverse=True) return answers[0] inputs = [ gr.inputs.Textbox(lines=5, label="Tema"), gr.inputs.Textbox(lines=5, label="Pregunta") ] outputs = [ gr.outputs.Textbox(type='str',label="Respuesta"), gr.outputs.Textbox(type='str',label="Contexto de la Respuesta"), gr.outputs.HTML(label="Articulo de Referencia"), gr.outputs.Label(type="confidences",label="Confianza en la respuesta (asumiendo que la página de referencia es correcta para el tema)"), gr.outputs.Textbox(type='str',label="Tipo de Búsqueda"), ] title = "Pregunta/Respuesta en la Wikipedia en Castellano" description = """