smaximo commited on
Commit
11661d4
1 Parent(s): c76f495

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -0
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import torch
4
+
5
+ title = "Extractive QA Biomedicine"
6
+ description = """
7
+ <p style="text-align: justify;">
8
+ Taking into account the existence of masked language models trained on Spanish Biomedical corpus, the objective of this project is to use them to generate extractice QA models for Biomedicine and compare their effectiveness with general masked language models.
9
+
10
+ The models were trained on the SQUAD_ES Dataset (automatic translation of the Stanford Question Answering Dataset into Spanish). SQUAD v2 version was chosen in order to include questions that cannot be answered based on a provided context.
11
+
12
+ The models were evaluated on https://huggingface.co/datasets/hackathon-pln-es/biomed_squad_es_v2 , a subset of the SQUAD_ES dev dataset containing questions related to the Biomedical domain.
13
+ </p>
14
+ """
15
+ article = """
16
+ <p>
17
+ <h3>Results</h3>
18
+
19
+ <table class="table table-bordered table-hover table-condensed">
20
+ <thead><tr><th title="Field #1">Model</th>
21
+ <th title="Field #2">Base Model Domain</th>
22
+ <th title="Field #3">exact</th>
23
+ <th title="Field #4">f1</th>
24
+ <th title="Field #5">HasAns_exact</th>
25
+ <th title="Field #6">HasAns_f1</th>
26
+ <th title="Field #7">NoAns_exact</th>
27
+ <th title="Field #8">NoAns_f1</th>
28
+ </tr></thead>
29
+ <tbody><tr>
30
+ <td>hackathon-pln-es/roberta-base-bne-squad2-es</td>
31
+ <td>General</td>
32
+ <td align="right">67.6341</td>
33
+ <td align="right">75.6988</td>
34
+ <td align="right">53.7367</td>
35
+ <td align="right">70.0526</td>
36
+ <td align="right">81.2174</td>
37
+ <td align="right">81.2174</td>
38
+ </tr>
39
+ <tr>
40
+ <td>hackathon-pln-es/roberta-base-biomedical-clinical-es-squad2-es</td>
41
+ <td>Biomedical</td>
42
+ <td align="right">66.8426</td>
43
+ <td align="right">75.2346</td>
44
+ <td align="right">53.0249</td>
45
+ <td align="right">70.0031</td>
46
+ <td align="right">80.3478</td>
47
+ <td align="right">80.3478</td>
48
+ </tr>
49
+ <tr>
50
+ <td>hackathon-pln-es/roberta-base-biomedical-es-squad2-es</td>
51
+ <td>Biomedical</td>
52
+ <td align="right">67.6341</td>
53
+ <td align="right">74.5612</td>
54
+ <td align="right">47.6868</td>
55
+ <td align="right">61.7012</td>
56
+ <td align="right">87.1304</td>
57
+ <td align="right"> 87.1304</td>
58
+ </tr>
59
+ <tr>
60
+ <td>hackathon-pln-es/biomedtra-small-es-squad2-es</td>
61
+ <td>Biomedical</td>
62
+ <td align="right">29.6394</td>
63
+ <td align="right">36.317</td>
64
+ <td align="right">32.2064</td>
65
+ <td align="right">45.716</td>
66
+ <td align="right">27.1304</td>
67
+ <td align="right">27.1304</td>
68
+ </tr>
69
+ </tbody></table>
70
+
71
+ <h3>Conclusion and Future Work</h3>
72
+ If F1 score is considered, the results show that there may be no advantage in using domain-specific masked language models to generate Biomedical QA models. In any case, close results are observed for the biomedical roberta-based models in comparison with the general roberta-based model.
73
+ <ul>
74
+ However, if only unanswerable questions are taken into account, the model with the best F1 metric is hackathon-pln-es/roberta-base-biomedical-es-squad2-es.
75
+
76
+ As future work, the following experiments could be carried out:
77
+
78
+ <ul>
79
+ <li>Use Biomedical masked-language models that werw not generated from scratch from a Biomedical corpus but have been adapted from a general model, so as not to lose words and features of Spanish that are also present in biomedical questions and articles.
80
+ <li>Create a Biomedical training dataset with SQUAD v2 format..
81
+ <li>Generate a new and bigger validation dataset based on questions and contexts generated directly in Spanish and not translated as in SQUAD_Es v2.
82
+ <li>Ensamble different models.
83
+ </ul>
84
+ </p>
85
+ """
86
+
87
+ device = 0 if torch.cuda.is_available() else -1
88
+ MODEL_NAMES = ["hackathon-pln-es/biomedtra-small-es-squad2-es",
89
+ "hackathon-pln-es/roberta-base-biomedical-clinical-es-squad2-es",
90
+ "hackathon-pln-es/roberta-base-bne-squad2-es",
91
+ "jamarju/roberta-base-bne-squad-2.0-es"]
92
+
93
+ examples = [
94
+ [MODEL_NAMES[3], "¿Qué entidades están incluidas en el sistema de salud federal?", "En el sistema federal de salud (incluyendo el VA, el Servicio de Salud de la India y el NIH), los farmacéuticos de atención ambulatoria reciben plena autoridad prescriptiva independiente. En algunos estados como Carolina del Norte y Nuevo México estos médicos farmacéuticos reciben una autoridad prescriptiva y diagnóstica colaborativa. En 2011 la Junta de Especialidades Farmacéuticas aprobó la práctica de farmacia ambulatoria como una certificación de junta separada. La designación oficial para los farmacéuticos que pasan el examen de certificación de la farmacia ambulatoria será la Junta Certificada de Ambulatorio Farmacéutica y estos farmacéuticos llevarán las iniciales BCACP"],
95
+ [MODEL_NAMES[3], "¿Qué cidippido se utiliza como descripción de los ctenóforos en la mayoría de los libros de texto?","Para un filo con relativamente pocas especies, los ctenóforos tienen una amplia gama de planes corporales. Las especies costeras necesitan ser lo suficientemente duras para soportar las olas y remolcar partículas de sedimentos, mientras que algunas especies oceánicas son tan frágiles que es muy difícil capturarlas intactas para su estudio. Además, las especies oceánicas no conservan bien, y son conocidas principalmente por fotografías y notas de observadores. Por lo tanto, la mayor atención se ha concentrado recientemente en tres géneros costeros: Pleurobrachia, Beroe y Mnemiopsis. Al menos dos libros de texto basan sus descripciones de ctenóforos en los cidipépidos Pleurobrachia."]
96
+ ]
97
+
98
+ def getanswer(model_name, question, context):
99
+
100
+ question_answerer = pipeline("question-answering", model=model_name, device=device)
101
+
102
+ response = question_answerer({
103
+ 'question': question,
104
+ 'context': context
105
+ })
106
+ text_hilight_output = [
107
+ (context[:response['start']], None),
108
+ (context[response['start']:response['end']], 'Answer'),
109
+ (context[response['end']:], None)
110
+
111
+ ]
112
+ return response['answer'], response['score']
113
+
114
+ face = gr.Interface(
115
+ fn=getanswer,
116
+ inputs=[
117
+ gr.inputs.Radio(
118
+ label="Pick a QA Model",
119
+ choices=MODEL_NAMES,
120
+ ),
121
+ gr.inputs.Textbox(lines=1, placeholder="Question Here… "),
122
+ gr.inputs.Textbox(lines=10, placeholder="Context Here… ")
123
+ ],
124
+ outputs=[
125
+ gr.outputs.Textbox(label="Answer"),
126
+ gr.outputs.Label(num_top_classes=1, label='Score'),
127
+ ],
128
+ layout="vertical",
129
+ title=title,
130
+ examples=examples,
131
+ description=description,
132
+ article=article,
133
+ allow_flagging ="never"
134
+ )
135
+ face.launch()