jsr90 commited on
Commit
3a6ebd0
1 Parent(s): df90535

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +65 -0
  2. data.pdf +0 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from haystack.nodes import FARMReader, PreProcessor, PDFToTextConverter, TfidfRetriever
3
+ from haystack.document_stores import InMemoryDocumentStore
4
+ from haystack.pipelines import ExtractiveQAPipeline
5
+
6
+ document_store = InMemoryDocumentStore()
7
+ model = "./artifacts/model-afwukuq2:v0/"
8
+ reader = FARMReader(model_name_or_path=model)
9
+ preprocessor = PreProcessor(
10
+ clean_empty_lines=True,
11
+ clean_whitespace=True,
12
+ clean_header_footer=True,
13
+ split_by="word",
14
+ split_length=100,
15
+ split_respect_sentence_boundary=True,
16
+ split_overlap=3
17
+ )
18
+
19
+
20
+ def print_answers(results):
21
+ fields = ["answer", "score"] # "context",
22
+ answers = results["answers"]
23
+ filtered_answers = []
24
+
25
+ for ans in answers:
26
+ filtered_ans = {
27
+ field: getattr(ans, field)
28
+ for field in fields
29
+ if getattr(ans, field) is not None
30
+ }
31
+ filtered_answers.append(filtered_ans)
32
+
33
+ return filtered_answers
34
+
35
+
36
+ def pdf_to_document_store(pdf_file):
37
+ document_store.delete_documents()
38
+ converter = PDFToTextConverter(
39
+ remove_numeric_tables=True, valid_languages=["es"])
40
+ documents = [converter.convert(file_path=pdf_file, meta=None)[0]]
41
+ preprocessed_docs = preprocessor.process(documents)
42
+ document_store.write_documents(preprocessed_docs)
43
+ return None
44
+
45
+
46
+ def predict(question):
47
+ pdf_to_document_store("data.pdf")
48
+ retriever = TfidfRetriever(document_store=document_store)
49
+ pipe = ExtractiveQAPipeline(reader, retriever)
50
+ result = pipe.run(query=question, params={"Retriever": {
51
+ "top_k": 5}, "Reader": {"top_k": 3}})
52
+ answers = print_answers(result)
53
+ return answers
54
+
55
+
56
+ title = "Chatbot Refugiados"
57
+
58
+ iface = gr.Interface(fn=predict,
59
+ inputs=[gr.inputs.Textbox(lines=3, label='Haz una pregunta')],
60
+ outputs="text",
61
+ title=title,
62
+ theme="huggingface",
63
+ examples=['Dónde pedir ayuda?', 'qué hacer al llegar a España?']
64
+ )
65
+ iface.launch()
data.pdf ADDED
Binary file (54.2 kB). View file