File size: 2,809 Bytes
9a0d2e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be59158
9a0d2e1
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# -*- coding: utf-8 -*-
"""Hackathon_Illuminati.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1B-SaMQ85UdV9DnqZ6_OWqg8oOAlgcZ5V

# LangChain QA Panel App

This notebook shows how to make this app:
"""

# !pip install transformers
# !pip install easyocr
# !pip install pdf2image
# !apt-get install poppler-utils
# 
# ! pip install PyPDF2

import panel as pn
from transformers import pipeline
from pdf2image import convert_from_path
import easyocr

pn.extension('texteditor', template="bootstrap", sizing_mode='stretch_width')
pn.state.template.param.update(
    main_max_width="690px",
    header_background="#F08080",
)

file_input = pn.widgets.FileInput(width=300)

prompt = pn.widgets.TextEditor(
    value="", placeholder="Enter your questions here...", height=160, toolbar=False
)
run_button = pn.widgets.Button(name="Run")

widgets = pn.Row(
    pn.Column(prompt, run_button, margin=5), width = 630
)

def qa(file, query):

    images = convert_from_path(file)
    reader = easyocr.Reader(['en'])
    result = []
    for i in range(len(images)):
      # Save pages as images in the pdf
      images[i].save('page'+ str(i) +'.jpg', 'JPEG')
      x=str(i)
      t='page'+x+'.jpg'
      result.append(reader.readtext(t, detail = 0))
    text = ""
    for page in result:
      page_text = " ".join(page)
      text += page_text
    model =  pipeline("question-answering", model='deepset/roberta-base-squad2')
    context = text
    result = model(question=query, context=context)
    print(f"Answer: {result['answer']}")
    return result

convos = []  # list of all panel objects

def qa_result(_):
    # saving pdf as a temp file
    if file_input.value is not None:
        file_input.save("/content/temp.pdf")

        prompt_text = prompt.value
        if prompt_text:
            result = qa(file="/content/temp.pdf", query=prompt_text)
            convos.extend([
                pn.Row(
                    pn.panel("Q: ", width=10),
                    prompt_text,
                    width=600
                ),
                pn.Row(
                    pn.panel("A: ", width=10),
                    pn.Column(
                        result["answer"],
                    )
                )
            ])
            #return convos
    return pn.Column(*convos, margin=15, width=575, min_height=400)

qa_interactive = pn.panel(
    pn.bind(qa_result, run_button),
    loading_indicator=True,
)

output = pn.WidgetBox('*Output will show up here:*', qa_interactive, width=630, scroll=True)

# layout
pn.Column(
    pn.pane.Markdown("""
    Question Answering with your PDF file!

    1) Upload a PDF. \n
    2) Type a question and click "Run".

    """),
    pn.Row(file_input),
    output,
    widgets

).servable()