File size: 4,853 Bytes
09b18fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import langchain
import gradio as gr
import random
import time
import transformers
from langchain import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
import os
import torch

import torch
from transformers import LlamaForCausalLM, LlamaTokenizer


# Hugging Face model_path
model_id = 'SachinKaushik/docGPT'
tokenizer = LlamaTokenizer.from_pretrained(model_id)
model = LlamaForCausalLM.from_pretrained(
    model_id, torch_dtype=torch.float16, device_map='auto',
)


# set model to eval mode
model.eval()

# Build HF Transformers pipeline
pipeline=transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    max_length=768,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
)

# Setup prompt template
template = PromptTemplate(input_variables=['input'], template='{input}')

# Pass hugging face pipeline to langchain class
llm = HuggingFacePipeline(pipeline=pipeline)

# Build stacked LLM chain i.e. prompt-formatting + LLM
chain = LLMChain(llm=llm, prompt=template)

# setup prompt template for an instruction with no input
prompt = PromptTemplate(
    input_variables=["query"],
    template="""You are a helpful AI assistant, you will answer the users query
with a short but precise answer. If you are not sure about the answer you state
"I don't know". This is a conversation, not a webpage, there should be ZERO HTML
in the response.

Remember, Assistant responses are concise. Here is the conversation:

User: {query}
Assistant: """
)

# Pass hugging face pipeline to langchain class
llm = HuggingFacePipeline(pipeline=pipeline)

# Build stacked LLM chain i.e. prompt-formatting + LLM
llm_chain = LLMChain(llm=llm, prompt=prompt)


# import PDF document loaders and splitter
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter

# Import chroma as the vector store
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA

# Import vector store tools
from langchain.agents.agent_toolkits import (
    create_vectorstore_agent,
    VectorStoreToolkit,
    VectorStoreInfo
)

# embedding Class
from langchain.embeddings import HuggingFaceEmbeddings


# function to generate embeddings
from langchain.document_loaders import WebBaseLoader

def load_data_in_VectorDB(data_source,doc_type='text', model_id='intfloat/e5-base-v2', chunk_size=300, chunk_overlap=100):
    if doc_type=="text":
        loader = TextLoader(data_source,encoding="utf-8" )
        documents = loader.load()
    else:
        loader = WebBaseLoader(data_source)
        documents = loader.load()

    text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0,add_start_index=True )
    texts = text_splitter.split_documents(documents)
    embeddings = HuggingFaceEmbeddings(model_name=model_id)
    return texts, embeddings

texts, embeddings = load_data_in_VectorDB(data_source='https://en.wikipedia.org/wiki/2022%E2%80%9323_NBA_season',doc_type='web')
db = Chroma.from_documents(texts, embeddings)
retriever = db.as_retriever()

# Pass hugging face pipeline to langchain class
llm = HuggingFacePipeline(pipeline=pipeline)

# qa agent using LLM and Retriever
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)

import gradio as gr

def generate_answer(query):
    ans = qa({"query": query})
    ans = ans['result']
    meta= "\n".join([i for i in {i.metadata['source'] for i in result['source_documents']}])
    return f"DocGPT Response: {ans} \nSource: {meta}"

theme = gr.themes.Default(#color contructors
                          primary_hue="red",
                          secondary_hue="blue",
                          neutral_hue="green")

with gr.Blocks(css="""#col_container {margin-left: auto; margin-right: auto;}
                # DocumentGPT {height: 520px; overflow: auto;} """, theme=theme ) as demo:
    chatbot = gr.Chatbot(label="DocumentGPTBot")
    msg = gr.Textbox(label = "DocGPT")
    clear = gr.ClearButton([msg, chatbot])

    def user(user_message, history):
        return "", history + [[user_message, None]]

    def bot(history):
        bot_message = generate_answer(history[-1][0])
        history[-1][1] = ""
        for character in bot_message:
            history[-1][1] += character
            time.sleep(0.05)
            yield history

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    clear.click(lambda: None, None, chatbot, queue=False)

    with gr.Row(visible=True) as button_row:
        upvote_btn = gr.Button(value="πŸ‘  Upvote", interactive=True)
        downvote_btn = gr.Button(value="πŸ‘Ž  Downvote", interactive=True)

demo.queue()
demo.launch(debug=True)