File size: 4,448 Bytes
b0609c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ff94fb
b0609c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29fa803
deddf21
29fa803
b0609c9
e473bbe
b0609c9
 
 
 
 
 
 
 
 
 
 
 
 
efeaa70
b0609c9
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from pydantic import NoneStr
import os
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import UnstructuredFileLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from pypdf import PdfReader
import mimetypes
import validators
import requests
import tempfile
import gradio as gr
import openai


def get_empty_state():
    return {"knowledge_base": None}


def create_knowledge_base(docs):
    # split into chunks
    text_splitter = CharacterTextSplitter(
        separator="\n", chunk_size=500, chunk_overlap=0, length_function=len
    )
    chunks = text_splitter.split_documents(docs)

    # Create embeddings
    embeddings = OpenAIEmbeddings()
    knowledge_base = FAISS.from_documents(chunks, embeddings)
    return knowledge_base


def upload_file(file_obj):
    try:
      loader = UnstructuredFileLoader(file_obj.name, strategy="fast")
      docs = loader.load()

      knowledge_base = create_knowledge_base(docs)
    except:
      text="Try Another file"
      return  file_obj.name, text

    return file_obj.name, {"knowledge_base": knowledge_base}


def upload_via_url(url):
    if validators.url(url):
        r = requests.get(url)

        if r.status_code != 200:
            raise ValueError(
                "Check the url of your file; returned status code %s" % r.status_code
            )

        content_type = r.headers.get("content-type")
        file_extension = mimetypes.guess_extension(content_type)
        temp_file = tempfile.NamedTemporaryFile(suffix=file_extension, delete=False)
        temp_file.write(r.content)
        file_path = temp_file.name
        loader = UnstructuredFileLoader(file_path, strategy="fast")
        docs = loader.load()
        with open(file_path, mode="rb") as f:
            pass
        knowledge_base = create_knowledge_base(docs)
        return file_path, {"knowledge_base": knowledge_base}
    else:
        raise ValueError("Please enter a valid URL")


def answer_question(question, state):

    try:
        knowledge_base = state["knowledge_base"]
        docs = knowledge_base.similarity_search(question)

        llm = OpenAI(temperature=0.4)
        chain = load_qa_chain(llm, chain_type="stuff")
        response = chain.run(input_documents=docs, question=question)
        return response
    except:
        return "Please upload Proper Document"
title = """<br><div style="text-align: center;max-width: 700px;">
             <center><img class="image" align="center" src="https://templates.images.credential.net/1612472097627370951721412474196.png" alt="Image" width="230" height="230" ></center>
            <br><br><h1><span style="display:inline-block; margin-left: 1em; text-decoration:none; font-weight:bold;">Hudson & Hayes</span> - Document QA</h1>
            </p>"""
with gr.Blocks(css="style.css",theme= 'karthikeyan-adople/hudsonhayes-gray') as demo:
    state = gr.State(get_empty_state())
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)
        gr.Markdown("**Upload your file**")
        with gr.Row(elem_id="row-flex"):
            with gr.Column(scale=0.85):
                file_url = gr.Textbox(
                      value="",
                      label="Upload your file",
                      placeholder="Enter a url",
                      show_label=False,
                      visible=True,elem_classes="filenameshow")
            with gr.Column(scale=0.15, min_width=160):
                upload_button = gr.UploadButton("Browse File", file_types=[".txt", ".pdf", ".doc", ".docx",".xlsx",".csv"],elem_classes="filenameshow")
        file_output = gr.File(elem_classes="filenameshow")
        with gr.Row():
            with gr.Column(scale=1, min_width=0):
                user_question = gr.Textbox(value="",label='Question Box :',show_label=True, placeholder="Ask a question about your file:",elem_classes="spaceH")
        with gr.Row():
            with gr.Column(scale=1, min_width=0):
                answer = gr.Textbox(value="",label='Answer Box :',show_label=True, placeholder="",lines=5)
    file_url.submit(upload_via_url, file_url, [file_output, state])
    upload_button.upload(upload_file, upload_button, [file_output,state])
    user_question.submit(answer_question, [user_question, state], [answer])

demo.launch()