# %% import os from time import sleep from haystack.document_stores import ElasticsearchDocumentStore from haystack.utils import launch_es launch_es() sleep(30) # %% os.environ["HAYSTACK_TELEMETRY_ENABLED"] = "False" document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document") # %% import pandas as pd df_document = pd.read_csv("data/articles.csv") df_document.head() # %% articles = [] for idx, row in df_document.iterrows(): article = { "id": idx, "content": row["article"], "meta":{ "chapter_name": row["chapter_name"], "article_page": row["article_page"], "article_number": row["article_number"], "article_name": row["article_name"], }, } articles.append(article) document_store.write_documents(articles, index="document") print(f"Loaded {document_store.get_document_count()} documents") # %% from haystack.nodes import BM25Retriever retriever = BM25Retriever(document_store=document_store) # %% from haystack.nodes import FARMReader model_ckpt = "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es" reader = FARMReader( model_name_or_path=model_ckpt, progress_bar=False, max_seq_len=384, doc_stride=128, return_no_answer=False, use_gpu=False, ) # %% from haystack.pipelines import ExtractiveQAPipeline pipe = ExtractiveQAPipeline(reader, retriever) # %% from textwrap import fill def run_qa_pipeline(question): results = pipe.run( query=question, params={ "Retriever": {"top_k": 10}, "Reader": {"top_k": 5} } ) return results def results_as_markdown(results): top_answers = [] for count, result in enumerate(results["answers"]): article = document_store.get_document_by_id(result.document_id) meta = result.meta formatted_answer = """**Capítulo: {}.\t número: {}.\t nombre: {}.\t página: {}.** {} """.format( meta["chapter_name"], meta["article_number"], meta["article_name"], meta["article_page"], fill(article.content, 80), ) top_answers.append(formatted_answer) return "\n\n".join(top_answers) def query_qa_pipeline(question): results = run_qa_pipeline(question) return results_as_markdown(results) # %% import gradio as gr title = "**CONSOLIDADO NORMAS APROBADAS PARA LA PROPUESTA CONSTITUCIONAL POR EL PLENO DE LA CONVENCIÓN**" default_question = "educación gratuita" with gr.Blocks() as demo: gr.Markdown(title) with gr.Column(): with gr.Row(): question = gr.Textbox(lines=2, max_lines=3, label="Pregunta:", placeholder=default_question) with gr.Row(): btn = gr.Button("Buscar") with gr.Row(): answers = gr.Markdown() btn.click( fn=query_qa_pipeline, inputs=question, outputs=answers, ) demo.launch(share=True) # %%