hema1 commited on
Commit
7be7fa0
1 Parent(s): 7755f97

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from haystack.nodes import TextConverter, PDFToTextConverter, DocxToTextConverter, PreProcessor
2
+ import gradio as gr
3
+ pdf_converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
4
+ converted = pdf_converter.convert(file_path="statistics-for-machine-learning.pdf", meta
5
+
6
+ from haystack.nodes import PreProcessor
7
+ preprocessor = PreProcessor(
8
+ split_by="word",
9
+ split_length=200,
10
+ split_overlap=10,
11
+ )
12
+ preprocessed = preprocessor.process(converted)
13
+
14
+ from haystack.document_stores.faiss import FAISSDocumentStore
15
+
16
+ document_store = FAISSDocumentStore(faiss_index_factory_str="Flat", return_embedding=True)
17
+ document_store.delete_all_documents()
18
+ document_store.write_documents(preprocessed)
19
+
20
+ from haystack.nodes import DensePassageRetriever
21
+ from haystack.nodes import FARMReader
22
+ retriever = DensePassageRetriever(document_store=document_store)
23
+ reader = FARMReader(model_name_or_path='deepset/roberta-base-squad2-distilled', use_gpu=False)
24
+ document_store.update_embeddings(retriever)
25
+
26
+ from haystack.pipelines import ExtractiveQAPipeline
27
+ pipeline = ExtractiveQAPipeline(reader, retriever)
28
+
29
+ questions = [ 'What is linear regression?',
30
+ 'What is machine learning?',
31
+ 'What are the steps in machine learning model development and deployment?',
32
+ 'What is classification?'
33
+ ]
34
+ answers = []
35
+ for question in questions:
36
+ prediction = pipeline.run(query=question)
37
+
38
+ answers.append(prediction)
39
+
40
+ for answer in answers:
41
+ print('Q:', answer['query'])
42
+ print('A:', answer['answers'][0].answer)
43
+ print('Context: ', answer['answers'][0].context)
44
+ print('score: ',answer['answers'][0].score)
45
+ print('\n')
46
+
47
+ def correct(question):
48
+ prediction = pipeline.run(query=question)
49
+
50
+ return answers.append(prediction)
51
+
52
+ app_inputs = gr.inputs.File()
53
+
54
+ interface = gr.Interface(fn=correct,
55
+ inputs=[app_inputs,gr.inputs.Textbox(lines=10)],
56
+ outputs=gr.inputs.Textbox(lines=20),
57
+ title='PDF QA system')
58
+ interface.launch(share=True)