paascorb's picture
Update app.py
429cb16
raw
history blame
No virus
4.1 kB
import gradio as gr
from pathlib import Path
import os
os.system('pip install transformers')
os.system('pip install --upgrade pip')
os.system('pip install tensorflow')
from transformers import pipeline
docs = None
def request_pathname(files):
if files is None:
return [[]]
return [[file.name, file.name.split('/')[-1]] for file in files]
def validate_dataset(dataset):
global docs
docs = None # clear it out if dataset is modified
docs_ready = dataset.iloc[-1, 0] != ""
if docs_ready:
return "✨Listo✨"
else:
return "⚠️Esperando documentos..."
def do_ask(question, button, dataset):
global docs
docs_ready = dataset.iloc[-1, 0] != ""
if button == "✨Listo✨" and docs_ready:
for _, row in dataset.iterrows():
path = row['filepath']
text = Path(f'{path}').read_text()
question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')
QA_input = {
'question': question,
'context': text
}
return question_answerer(QA_input)['answer']
else:
return ""
# def do_ask(question, button, dataset, progress=gr.Progress()):
# global docs
# docs_ready = dataset.iloc[-1, 0] != ""
# if button == "✨Listo✨" and docs_ready:
# if docs is None: # don't want to rebuild index if it's already built
# import paperqa
# docs = paperqa.Docs()
# # dataset is pandas dataframe
# for _, row in dataset.iterrows():
# key = None
# if ',' not in row['citation string']:
# key = row['citation string']
# docs.add(row['filepath'], row['citation string'], key=key)
# else:
# return ""
# progress(0, "Construyendo índices...")
# docs._build_faiss_index()
# progress(0.25, "Encolando...")
# result = docs.query(question)
# progress(1.0, "¡Hecho!")
# return result.formatted_answer, result.context
with gr.Blocks() as demo:
gr.Markdown("""
# Document Question and Answer adaptado al castellano por Pablo Ascorbe.
Este espacio ha sido clonado y adaptado de: https://huggingface.co/spaces/whitead/paper-qa
La idea es utilizar un modelo preentrenado de HuggingFace como "distilbert-base-cased-distilled-squad"
y responder las preguntas en inglés, para ello, será necesario hacer primero una traducción de los textos en castellano
a inglés y luego volver a traducir en sentido contrario.
## Instrucciones:
Adjunte su documento, ya sea en formato .txt o .pdf, y pregunte lo que desee.
""")
uploaded_files = gr.File(
label="Sus documentos subidos (PDF o txt)", file_count="multiple", )
dataset = gr.Dataframe(
headers=["filepath", "citation string"],
datatype=["str", "str"],
col_count=(2, "fixed"),
interactive=True,
label="Documentos y citas"
)
buildb = gr.Textbox("⚠️Esperando documentos...",
label="Estado", interactive=False, show_label=True)
dataset.change(validate_dataset, inputs=[
dataset], outputs=[buildb])
uploaded_files.change(request_pathname, inputs=[
uploaded_files], outputs=[dataset])
query = gr.Textbox(
placeholder="Introduzca su pregunta aquí...", label="Pregunta")
ask = gr.Button("Preguntar")
gr.Markdown("## Respuesta")
answer = gr.Markdown(label="Respuesta")
with gr.Accordion("Contexto", open=False):
gr.Markdown(
"### Contexto\n\nEl siguiente contexto ha sido utilizado para generar la respuesta:")
context = gr.Markdown(label="Contexto")
# ask.click(fn=do_ask, inputs=[query, buildb,
# dataset], outputs=[answer, context])
ask.click(fn=do_ask, inputs=[query, buildb,
dataset], outputs=[answer])
demo.queue(concurrency_count=20)
demo.launch(show_error=True)