palegre
Add application file beta.
b19c8bc
# %%
import os
from time import sleep
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.utils import launch_es
launch_es()
sleep(30)
# %%
os.environ["HAYSTACK_TELEMETRY_ENABLED"] = "False"
document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document")
# %%
import pandas as pd
df_document = pd.read_csv("data/articles.csv")
df_document.head()
# %%
articles = []
for idx, row in df_document.iterrows():
article = {
"id": idx,
"content": row["article"],
"meta":{
"chapter_name": row["chapter_name"],
"article_page": row["article_page"],
"article_number": row["article_number"],
"article_name": row["article_name"],
},
}
articles.append(article)
document_store.write_documents(articles, index="document")
print(f"Loaded {document_store.get_document_count()} documents")
# %%
from haystack.nodes import BM25Retriever
retriever = BM25Retriever(document_store=document_store)
# %%
from haystack.nodes import FARMReader
model_ckpt = "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es"
reader = FARMReader(
model_name_or_path=model_ckpt,
progress_bar=False,
max_seq_len=384,
doc_stride=128,
return_no_answer=False,
use_gpu=False,
)
# %%
from haystack.pipelines import ExtractiveQAPipeline
pipe = ExtractiveQAPipeline(reader, retriever)
# %%
from textwrap import fill
def run_qa_pipeline(question):
results = pipe.run(
query=question,
params={
"Retriever": {"top_k": 10},
"Reader": {"top_k": 5}
}
)
return results
def results_as_markdown(results):
top_answers = []
for count, result in enumerate(results["answers"]):
article = document_store.get_document_by_id(result.document_id)
meta = result.meta
formatted_answer = """**Capítulo: {}.\t número: {}.\t nombre: {}.\t página: {}.**
{}
""".format(
meta["chapter_name"],
meta["article_number"],
meta["article_name"],
meta["article_page"],
fill(article.content, 80),
)
top_answers.append(formatted_answer)
return "\n\n".join(top_answers)
def query_qa_pipeline(question):
results = run_qa_pipeline(question)
return results_as_markdown(results)
# %%
import gradio as gr
title = "**CONSOLIDADO NORMAS APROBADAS PARA LA PROPUESTA CONSTITUCIONAL POR EL PLENO DE LA CONVENCIÓN**"
default_question = "educación gratuita"
with gr.Blocks() as demo:
gr.Markdown(title)
with gr.Column():
with gr.Row():
question = gr.Textbox(lines=2, max_lines=3, label="Pregunta:", placeholder=default_question)
with gr.Row():
btn = gr.Button("Buscar")
with gr.Row():
answers = gr.Markdown()
btn.click(
fn=query_qa_pipeline,
inputs=question,
outputs=answers,
)
demo.launch(share=True)
# %%