import langchain import gradio as gr import random import time import transformers from langchain import HuggingFacePipeline from langchain import PromptTemplate, LLMChain import os import torch import torch from transformers import LlamaForCausalLM, LlamaTokenizer # Hugging Face model_path model_id = 'SachinKaushik/docGPT' tokenizer = LlamaTokenizer.from_pretrained(model_id) model = LlamaForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16, device_map='auto', ) # set model to eval mode model.eval() # Build HF Transformers pipeline pipeline=transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, device_map="auto", max_length=768, do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id ) # Setup prompt template template = PromptTemplate(input_variables=['input'], template='{input}') # Pass hugging face pipeline to langchain class llm = HuggingFacePipeline(pipeline=pipeline) # Build stacked LLM chain i.e. prompt-formatting + LLM chain = LLMChain(llm=llm, prompt=template) # setup prompt template for an instruction with no input prompt = PromptTemplate( input_variables=["query"], template="""You are a helpful AI assistant, you will answer the users query with a short but precise answer. If you are not sure about the answer you state "I don't know". This is a conversation, not a webpage, there should be ZERO HTML in the response. Remember, Assistant responses are concise. Here is the conversation: User: {query} Assistant: """ ) # Pass hugging face pipeline to langchain class llm = HuggingFacePipeline(pipeline=pipeline) # Build stacked LLM chain i.e. prompt-formatting + LLM llm_chain = LLMChain(llm=llm, prompt=prompt) # import PDF document loaders and splitter from langchain.document_loaders import PyPDFLoader, TextLoader from langchain.text_splitter import CharacterTextSplitter # Import chroma as the vector store from langchain.vectorstores import Chroma from langchain.chains import RetrievalQA # Import vector store tools from langchain.agents.agent_toolkits import ( create_vectorstore_agent, VectorStoreToolkit, VectorStoreInfo ) # embedding Class from langchain.embeddings import HuggingFaceEmbeddings # function to generate embeddings from langchain.document_loaders import WebBaseLoader def load_data_in_VectorDB(data_source,doc_type='text', model_id='intfloat/e5-base-v2', chunk_size=300, chunk_overlap=100): if doc_type=="text": loader = TextLoader(data_source,encoding="utf-8" ) documents = loader.load() else: loader = WebBaseLoader(data_source) documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0,add_start_index=True ) texts = text_splitter.split_documents(documents) embeddings = HuggingFaceEmbeddings(model_name=model_id) return texts, embeddings texts, embeddings = load_data_in_VectorDB(data_source='https://en.wikipedia.org/wiki/2022%E2%80%9323_NBA_season',doc_type='web') db = Chroma.from_documents(texts, embeddings) retriever = db.as_retriever() # Pass hugging face pipeline to langchain class llm = HuggingFacePipeline(pipeline=pipeline) # qa agent using LLM and Retriever qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True) import gradio as gr def generate_answer(query): ans = qa({"query": query}) ans = ans['result'] meta= "\n".join([i for i in {i.metadata['source'] for i in result['source_documents']}]) return f"DocGPT Response: {ans} \nSource: {meta}" theme = gr.themes.Default(#color contructors primary_hue="red", secondary_hue="blue", neutral_hue="green") with gr.Blocks(css="""#col_container {margin-left: auto; margin-right: auto;} # DocumentGPT {height: 520px; overflow: auto;} """, theme=theme ) as demo: chatbot = gr.Chatbot(label="DocumentGPTBot") msg = gr.Textbox(label = "DocGPT") clear = gr.ClearButton([msg, chatbot]) def user(user_message, history): return "", history + [[user_message, None]] def bot(history): bot_message = generate_answer(history[-1][0]) history[-1][1] = "" for character in bot_message: history[-1][1] += character time.sleep(0.05) yield history msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, chatbot, chatbot ) clear.click(lambda: None, None, chatbot, queue=False) with gr.Row(visible=True) as button_row: upvote_btn = gr.Button(value="👍 Upvote", interactive=True) downvote_btn = gr.Button(value="👎 Downvote", interactive=True) demo.queue() demo.launch(debug=True)