Spaces:
Runtime error
Runtime error
| # %% | |
| import os | |
| from time import sleep | |
| from haystack.document_stores import ElasticsearchDocumentStore | |
| from haystack.utils import launch_es | |
| launch_es() | |
| sleep(30) | |
| # %% | |
| os.environ["HAYSTACK_TELEMETRY_ENABLED"] = "False" | |
| document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document") | |
| # %% | |
| import pandas as pd | |
| df_document = pd.read_csv("data/articles.csv") | |
| df_document.head() | |
| # %% | |
| articles = [] | |
| for idx, row in df_document.iterrows(): | |
| article = { | |
| "id": idx, | |
| "content": row["article"], | |
| "meta":{ | |
| "chapter_name": row["chapter_name"], | |
| "article_page": row["article_page"], | |
| "article_number": row["article_number"], | |
| "article_name": row["article_name"], | |
| }, | |
| } | |
| articles.append(article) | |
| document_store.write_documents(articles, index="document") | |
| print(f"Loaded {document_store.get_document_count()} documents") | |
| # %% | |
| from haystack.nodes import BM25Retriever | |
| retriever = BM25Retriever(document_store=document_store) | |
| # %% | |
| from haystack.nodes import FARMReader | |
| model_ckpt = "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es" | |
| reader = FARMReader( | |
| model_name_or_path=model_ckpt, | |
| progress_bar=False, | |
| max_seq_len=384, | |
| doc_stride=128, | |
| return_no_answer=False, | |
| use_gpu=False, | |
| ) | |
| # %% | |
| from haystack.pipelines import ExtractiveQAPipeline | |
| pipe = ExtractiveQAPipeline(reader, retriever) | |
| # %% | |
| from textwrap import fill | |
| def run_qa_pipeline(question): | |
| results = pipe.run( | |
| query=question, | |
| params={ | |
| "Retriever": {"top_k": 10}, | |
| "Reader": {"top_k": 5} | |
| } | |
| ) | |
| return results | |
| def results_as_markdown(results): | |
| top_answers = [] | |
| for count, result in enumerate(results["answers"]): | |
| article = document_store.get_document_by_id(result.document_id) | |
| meta = result.meta | |
| formatted_answer = """**Capítulo: {}.\t número: {}.\t nombre: {}.\t página: {}.** | |
| {} | |
| """.format( | |
| meta["chapter_name"], | |
| meta["article_number"], | |
| meta["article_name"], | |
| meta["article_page"], | |
| fill(article.content, 80), | |
| ) | |
| top_answers.append(formatted_answer) | |
| return "\n\n".join(top_answers) | |
| def query_qa_pipeline(question): | |
| results = run_qa_pipeline(question) | |
| return results_as_markdown(results) | |
| # %% | |
| import gradio as gr | |
| title = "**CONSOLIDADO NORMAS APROBADAS PARA LA PROPUESTA CONSTITUCIONAL POR EL PLENO DE LA CONVENCIÓN**" | |
| default_question = "educación gratuita" | |
| with gr.Blocks() as demo: | |
| gr.Markdown(title) | |
| with gr.Column(): | |
| with gr.Row(): | |
| question = gr.Textbox(lines=2, max_lines=3, label="Pregunta:", placeholder=default_question) | |
| with gr.Row(): | |
| btn = gr.Button("Buscar") | |
| with gr.Row(): | |
| answers = gr.Markdown() | |
| btn.click( | |
| fn=query_qa_pipeline, | |
| inputs=question, | |
| outputs=answers, | |
| ) | |
| demo.launch(share=True) | |
| # %% | |