File size: 4,542 Bytes
3c1280f
 
491a313
3c1280f
 
d196b28
3c1280f
491a313
e6f531d
3c1280f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49d02be
b56f6f0
d196b28
 
 
49d02be
 
 
 
 
 
 
 
 
 
 
 
d196b28
 
 
7e3d155
3c1280f
d196b28
274ac07
 
 
ec176c1
d196b28
 
49d02be
ec176c1
 
 
 
0864f69
49d02be
 
 
 
 
 
 
 
 
 
 
ec176c1
 
d196b28
 
 
32cdcf7
3c1280f
32cdcf7
 
3c1280f
 
cfad7c4
274ac07
7e3d155
32cdcf7
 
3c1280f
 
a13c7d1
9bb3f3f
32cdcf7
989d4c0
32cdcf7
2950d04
9bb3f3f
32cdcf7
 
cb303b3
32cdcf7
 
49d02be
9bb3f3f
32cdcf7
9bb3f3f
3c1280f
a13c7d1
 
c633676
ab00b78
35382e3
a13c7d1
3c1280f
 
 
989d4c0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from transformers import pipeline
import wikipedia
wikipedia.set_lang("ca")
import random
import gradio as gr
import textwrap

model_name = "projecte-aina/roberta-base-ca-v2-cased-qa"

nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)


def get_wiki_article(topic):
    topic=topic
    try:
        search = wikipedia.search(topic, results = 1)[0]
    except wikipedia.DisambiguationError as e:
        choices = [x for x in e.options if ('disambiguation' not in x) and ('All pages' not in x) and (x!=topic)]
        search = random.choice(choices)
    try:
        p = wikipedia.page(search)
    except wikipedia.exceptions.DisambiguationError as e:
        choices = [x for x in e.options if ('disambiguation' not in x) and ('All pages' not in x) and (x!=topic)]
        s = random.choice(choices)
        p = wikipedia.page(s)
    return p.content, p.url

def get_answer(topic, question):
    w_art, w_url=get_wiki_article(topic)
    if len(w_art) < 7000:
        #w_art = w_art[:3000]
        qa = {'question': question, 'context': w_art}
        res = nlp(qa)
        res['type'] = "one-pass"
        if res['answer'] not in ['\n',' ','']:
            ans = res['answer']
            if res["start"] < 500:
                st = 0
            else:
                st = res["start"] - 500
            en = res["end"]+500
            contextual = w_art[st:res["start"]]+ " --> "+ans+ " <-- "+w_art[res["end"]:en]
            #thetext = w_art[res['start']-500:res['start']+500:]
            res['text']  = contextual
        #thetext = w_art[res['start']-200:res['start']+200:]
        #res['text']  = thetext
    else:
        res = recursive_rank(w_art,question)
        res['type'] = "recursive"
    return res['answer'], res['text'],'<a href="'+w_url+'">'+w_url+'</a>', {'confidence':res['score']},res['type']

def recursive_rank(w_art,question):
    parts = round(len(w_art)/5000)
    size = round(len(w_art)/parts)
    trozos = textwrap.wrap(w_art, size)
    
    answers = []
    for chunk in trozos:
        if len(chunk) < 500:
            pass
        else:
            qa = {'question': question, 'context': chunk}
            res = nlp(qa)
            if res['answer'] not in ['\n',' ','']:
                ans = res['answer']
                if res["start"] < 500:
                    st = 0#0newres["start"]
                else:
                    st = res["start"] - 500
                # if newres["end"] < 500:
                #     en = newres["end"]
                # else:
                en = res["end"]+500
                contextual = chunk[st:res["start"]]+ " --> "+ans+ " <-- "+chunk[res["end"]:en]
                thetext = contextual#chunk[res['start']-300:res['start']+300:]
                res['text']  = thetext
                answers.append(res)
    answers.sort(key=lambda x: x['score'], reverse=True) 
    return answers[0]


inputs = [
          gr.inputs.Textbox(lines=5, label="Tema"),
          gr.inputs.Textbox(lines=5, label="Pregunta")
]
outputs = [
            gr.outputs.Textbox(type='str',label="Resposta"),
            gr.outputs.Textbox(type='str',label="Context de la resposta"),
            gr.outputs.HTML(label="Article de Referencia"),
            gr.outputs.Label(type="confidences",label="Confiança en la resposta (assumint el context i la pàgina del tòpic son correctes)"),
            gr.outputs.Textbox(type='str',label="Tipus de cerca"),
]

title = "Pregunta/Respuesta en la Wikipedia en Castellano"
description = """

PROVES VERSIÓ CASTELLÁ ...

Pots fer preguntes tipus: que, qui, quin, quan, etc.

Els articles de mes de 8 mil caracteres podrien trigar força per que es fa una cerca recursiva. Si la resposta no es correcta, o la pàgina trobada no és rellevant, intenta replantejar la pregunta o triar com a tema algun terme mes específic.


"""
article = """<center>


</center>
"""
examples = [
    ['Invasión de Yugoslavia', '¿Cómo denominaron las fuerzas del Eje la invasión de Yugoslavia?'],
    ['Duna de Pilat', '¿Cómo son las pequeñas dunas que se encuentran junto a la Duna de Pilat?'],
    ['Fútbol', '¿Cuándo empezó a celebrarse la Copa Mundial de Fútbol?'],
    ['Batalla del Ebro', '¿Quién era jefe del Estado Mayor republicano?'],
    ['Batalla del Ebro', '¿Qué bando ganó la Batalla?'],


]

gr.Interface(get_answer, inputs, outputs, title=title, description=description, examples=examples, article=article, 
 theme="default", flagging_options=["incorrect", "correct"]).launch(share=False,enable_queue=False)