import gradio as gr from haystack.nodes import FARMReader, PreProcessor, PDFToTextConverter, TfidfRetriever from haystack.document_stores import InMemoryDocumentStore from haystack.pipelines import ExtractiveQAPipeline #from torch import from sentence_transformers import CrossEncoder, SentenceTransformer document_store = InMemoryDocumentStore() model = "Saturdays/mdeberta-v3-base-squad2_refugees_dataset_finetuned" #model = 'codellama/CodeLlama-13b-Instruct-hf' reader = FARMReader(model_name_or_path=model) preprocessor = PreProcessor( clean_empty_lines=True, clean_whitespace=True, clean_header_footer=True, split_by="word", split_length=100, split_respect_sentence_boundary=True, split_overlap=3 ) def print_answers(results): fields = ["answer", "score"] # "context", answers = results["answers"] filtered_answers = [] for ans in answers: filtered_ans = { field: getattr(ans, field) for field in fields if getattr(ans, field) is not None } filtered_answers.append(filtered_ans) return filtered_answers def pdf_to_document_store(pdf_file): document_store.delete_documents() converter = PDFToTextConverter( remove_numeric_tables=True, valid_languages=["es"]) documents = [converter.convert(file_path=pdf_file, meta=None)[0]] preprocessed_docs = preprocessor.process(documents) document_store.write_documents(preprocessed_docs) return None def predict(question): pdf_to_document_store("data.pdf") retriever = TfidfRetriever(document_store=document_store) pipe = ExtractiveQAPipeline(reader, retriever) result = pipe.run(query=question, params={"Retriever": { "top_k": 5}, "Reader": {"top_k": 3}}) answers = print_answers(result) return answers def respond(message, chat_history): if len(message)==0: message="¿Dónde puedo solicitar asilo?" bot_message = predict(message)[0]['answer'] chat_history.append((message, bot_message)) return "", chat_history description= "Our chatbot helps refugees arriving in Spain by providing information on key topics. \n This project is based on the article titled [Desarrollando un chatbot para refugiados: nuestra experiencia en Saturdays.AI](https://medium.com/saturdays-ai/desarrollando-un-chatbot-para-refugiados-nuestra-experiencia-en-saturdays-ai-9bf2551432c9), which outlines the process of building a chatbot for refugees. \n You can find the training script in this [github repo](https://github.com/jsr90/chatbot_refugiados_train)." with gr.Blocks(theme="huggingface") as demo: gr.HTML("

Chatbot Refugiados (spanish)

") gr.HTML("

The demo you're about to see is from a project currently in development.

") with gr.Row(): with gr.Column(scale=2): chatbot = gr.Chatbot() with gr.Column(scale=1): with gr.Row(): msg = gr.Textbox(label="Write your question:", value="¿Dónde puedo solicitar asilo?") with gr.Row(): submit = gr.Button("Submit") clear = gr.Button("Clear") gr.Image("OIG.jpeg") msg.submit(respond, [msg, chatbot], [msg, chatbot]) submit.click(respond, [msg, chatbot], [msg, chatbot]) clear.click(lambda: None, None, chatbot, queue=False) gr.Markdown(description) if __name__ == "__main__": demo.launch()