File size: 4,601 Bytes
3dff4cb
e9840df
 
 
 
 
3dff4cb
e9840df
 
 
 
 
 
 
 
 
 
 
3dff4cb
 
 
 
 
 
 
 
 
e9840df
 
 
 
 
 
 
 
 
 
 
3dff4cb
e9840df
 
 
 
 
 
 
 
 
 
 
 
 
 
3dff4cb
 
 
 
 
 
e9840df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be312e0
 
 
3dff4cb
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os 
import gradio as gr

from langchain.document_loaders import PDFMinerLoader,CSVLoader ,UnstructuredWordDocumentLoader,TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain import HuggingFaceHub


DEVICE = 'cpu '
FILE_EXT = ['pdf','text','csv','word','wav']


def loading_pdf():
    return "Loading..."


def get_openai_chat_model(API_key):
    try:
        from langchain.llms import OpenAI
    except ImportError as err:
        raise "{}, unable to load openAI. Please install openai and add OPENAIAPI_KEY"
    os.environ["OPENAI_API_KEY"] = API_key
    llm = OpenAI()
    return llm

def process_documents(documents,data_chunk=1000,chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap)
    texts = text_splitter.split_documents(documents[0])
    return texts

def get_hugging_face_model(model_id,API_key,temperature=0.1):
    chat_llm = HuggingFaceHub(huggingfacehub_api_token=API_key,
                                      repo_id=model_id,
                                      model_kwargs={"temperature": temperature, "max_new_tokens": 2048})
    return chat_llm

def chat_api(file_data,doc_type='pdf',key=None,llm_model='HuggingFace'):
    embedding_model = SentenceTransformerEmbeddings(model_name='all-mpnet-base-v2',model_kwargs={"device": DEVICE})

    document = None
    if doc_type == 'pdf':
        document = process_pdf_document(document_file_name=file_data)
    elif doc_type == 'text':
        document = process_text_document(document_file_name=file_data)
    elif doc_type == 'csv':
        document = process_csv_document(document_file_name=file_data)
    elif doc_type == 'word':
        document = process_word_document(document_file_name=file_data)
    
    texts = process_documents(documents=document)
    vectordb = FAISS.from_documents(documents=texts, embedding= embedding_model)
    if llm_model == 'HuggingFace':
        llm = get_hugging_face_model(model_id='tiiuae/falcon-7b-instruct',API_key=key)
    else:
        llm_model = get_openai_chat_model(API_key=key)



        
def process_text_document(document_file_name):
    loader = TextLoader(document_file_name)
    document = loader.load()
    return document


def process_csv_document(document_file_name):
    loader = CSVLoader(file_path=document_file_name)
    document = loader.load()
    return document


def process_word_document(document_file_name):
    loader = UnstructuredWordDocumentLoader(file_path=document_file_name)
    document = loader.load()
    return document


def process_pdf_document(document_file_name):
    loader = PDFMinerLoader(document_file_name)
    document = loader.load()[0]
    return document





css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""

title = """
<div style="text-align: center;max-width: 700px;">
    <h1>Chat with Data • OpenAI/HuggingFace</h1>
    <p style="text-align: center;">Upload a file from your computer, click the "Load data to LangChain" button, <br />
    when everything is ready, you can start asking questions about the data you uploaded ;) <br />
    This version is just for QA retrival so it will not use chat history, and uses Hugging face as LLM, 
    so you don't need any key</p>
</div>
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)
        
        with gr.Column():
            with gr.Box():
                LLM_option = gr.Dropdown(['HuggingFace','OpenAI'],label='LLM',info='select the LLM to be used')
                API_key = gr.Textbox(label="You OpenAI/Huggingface API key", type="password")
            with gr.Column():
                file_extension = gr.Dropdown(FILE_EXT, label="File Extensions", info="Select your files extensions!")
                pdf_doc = gr.File(label="Load a File", file_types=FILE_EXT, type="file")
                with gr.Row():
                    langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
                    load_pdf = gr.Button("Load file to langchain")
        
        chatbot = gr.Chatbot()
        question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
        submit_button = gr.Button("Send Message")
    load_pdf.click(loading_pdf, None, langchain_status, queue=False)    
    load_pdf.click(chat_api, inputs=[pdf_doc,file_extension,API_key,LLM_option], outputs=[langchain_status], queue=False)