File size: 7,610 Bytes
8e2b48f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import gradio as gr
import numpy as np
import random
# import torch
# from langchain import HuggingFacePipeline
# from langchain.chains import LLMChain, RetrievalQA
# from langchain.document_loaders import (
#     DirectoryLoader,
#     PyPDFLoader,
#     TextLoader,
#     UnstructuredPDFLoader,
# )
# from langchain.embeddings import HuggingFaceEmbeddings, LlamaCppEmbeddings
# from langchain.llms import LlamaCpp
# from langchain.prompts import PromptTemplate
# from langchain.text_splitter import (
#     CharacterTextSplitter,
#     RecursiveCharacterTextSplitter,
# )
# from langchain.vectorstores import Chroma
# from PIL import Image


def file_upload(input_file):
    # Process the uploaded file
    if input_file is not None:
        # Save the uploaded file or perform any desired operations
        file_path = "/tmp/file.pdf"
        content = input_file.read()
        try:
            with open(file_path, 'wb') as file:
                file.write(content)
            return {error_box: gr.Textbox(label="Completed",
                                          value=f"File uploaded successfully in {file_path}.", visible=True)}
        except Exception as e:
            return {error_box: gr.Textbox(label="Error",
                                          value=f"Error occurred while writing the file: {e}", visible=True)}


def respond(message, chat_history):
        #No LLM here, just respond with a random pre-made message
        bot_message = random.choice(["Tell me more about it", 
                                     "Cool, but I'm not interested", 
                                     "Hmmmm, ok then"]) 
        chat_history.append((message, bot_message))
        return "", chat_history


# Gradio interface
def qa_bot(pdf_file, question):
    texts = load_docs(pdf_file)
    model = setup_dbqa(texts)
    answer = model({'query': question})
    return f"Question: {answer['query']}\nAnswer: {answer['result']}\nSource documents: {answer['source_documents']}"


# Helper function to load documents from PDF files
def load_docs(file_path):
    loader = DirectoryLoader(file_path,
                             glob="*.pdf",
                             loader_cls=UnstructuredPDFLoader)
    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
                                                   chunk_overlap=0,
                                                   length_function=len,)
    texts = text_splitter.split_documents(documents)
    return texts


# Helper function to set up the question-answering model
def setup_dbqa(texts):
    print("Setting up DBQA ...")
    llm = HuggingFacePipeline.from_model_id(
        model_id="NousResearch/Llama-2-13b-chat-hf",
        task="text-generation",
        model_kwargs={
            "max_length": 1500, "load_in_8bit": True},
    )

    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                       model_kwargs={'device': 'cpu'})

    vectorstore = Chroma.from_documents(texts, embeddings, persist_directory="vectorstore")

    prompt = set_qa_prompt()

    return build_retrieval_qa(llm, prompt, vectorstore)



def set_qa_prompt():
    # set prompt template
    prompt_template = """<s>[INST] <<SYS>> Use the following pieces of context closed between $ to answer the question closed between |. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
    ${context}$ <</SYS>>
    Question: |{question}|
    Answer:[/INST]</s>"""
    prompt = PromptTemplate(
        template=prompt_template, input_variables=["context", "question"]
    )
    return prompt


# Build RetrievalQA object

def build_retrieval_qa(_llm, _prompt, _vectorstore):
    dbqa = RetrievalQA.from_chain_type(llm=_llm,
                                       chain_type='stuff',
                                       retriever=_vectorstore.as_retriever(search_kwargs={'k': 3}),
                                       return_source_documents=True,
                                       chain_type_kwargs={'prompt': _prompt})
    return dbqa





if __name__ == "__main__":

    # How to RUN code ==> gradio gradio_app.py 
    gr.themes.builder()

    # # Define text and title information 
    # title1 = "## QA App"

    # title2 = " ## Gradio QA Bot"

    # intro = """
    #     Welcome! This is not just any bot, it's a special one equipped with state-of-the-art natural language processing capabilities, and ready to answer your queries.

    #     Ready to explore? Let's get started!

    #     * Step 1: Upload a PDF document.
    #     * Step 2: Type in a question related to your document's content.
    #     * Step 3: Get your answer!

    #     Push clear cache before uploading a new doc!
    #     """

    # about = """ 
    #     ## About
    #     This app is an LLM-powered chatbot built using:
    #     - [Streamlit](<https://streamlit.io/>)
    #     - [HugChat](<https://github.com/Soulter/hugging-chat-api>)
    #     - Chat Model = llama2-chat-hf 7B 
    #     - Retreiver model = all-MiniLM-L6-v2

    #     💡 Note: No API key required!
    #     """


    # # Define theme ==> see gr.themes.builder()
    # theme = gr.themes.Soft(
    #     primary_hue="green",
    #     secondary_hue="blue",
    #     neutral_hue="indigo"
    #     ).set(
    #     background_fill_primary='*primary_50',
    #     shadow_drop='*shadow_spread',
    #     button_border_width='*block_border_width',
    #     button_border_width_dark='*block_label_border_width'
    # )


    # with gr.Blocks(theme=theme) as demo:
    #     with gr.Row():
    #         with gr.Column(scale=2, min_width=400):
    #             title1_gr= gr.Markdown(title1)
    #             intro_gr = gr.Markdown(intro)
    #             # Create a Gradio interface with a file upload input
    #             error_box = gr.Textbox(label="Error", visible=False)
    #             # upload_button = gr.Interface(fn=file_upload,
    #             #                 inputs=gr.File(),
    #             #                 outputs=error_box,
    #             #                 description="Drag and drop your document here")
    #             upload_button = gr.UploadButton("Drag and drop your document here", 
    #                                             size="lg", scale=3, min_width=240,
    #                                             file_types=["pdf"])
    #             upload_button.upload(file_upload, upload_button, error_box)

    #         with gr.Column(scale=2, min_width=800):
    #             title2_gr = gr.Markdown(title2)

    #             chatbot = gr.Chatbot(label="Bot", height=500) 
    #             msg = gr.Textbox(label="User", placeholder="Ask a question about the uploaded PDF document.")
    #             chatbot_btn = gr.Button("Submit")
    #             clear = gr.ClearButton(components=[msg, chatbot], value="Clear console")
    #             chatbot_btn.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])


    #         with gr.Column(scale=3, min_width=600):
    #             with gr.Row():
    #                 about_gr = gr.Markdown(about)  
    #                 logo_gr = gr.Markdown(""" </br> </br>
    #                                     <img src="file/logo_neovision.png" alt="logo" style="width:600px;"/>""")  
    #                 # gr.Image("./logo_neovision.png")


    # gr.close_all()
    # demo.launch(share=True, enable_queue=True)