# -*- coding: utf-8 -*- """Olive_XA.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1WOPxCenW1U3q9tAz2LBSr7TLZxfi0AXe """ #!pip install transformers torch -q #!pip install einops -q #!pip install accelerate -q #!pip install sentence-transformers -q #!pip install langchain -q #!pip install python-docx -q #!pip install unstructured -q #!pip install faiss-gpu -q #!pip install torch -q #!pip install accelerate bitsandbytes #!pip install gradio -q import os from langchain.vectorstores import FAISS from langchain.vectorstores import chroma from langchain.document_loaders import DirectoryLoader from langchain.document_loaders import TextLoader from langchain.document_loaders import UnstructuredWordDocumentLoader from langchain.chains.question_answering import load_qa_chain from langchain.prompts import PromptTemplate from langchain.memory import ConversationBufferMemory from langchain.embeddings import HuggingFaceEmbeddings from langchain.chains import RetrievalQA from langchain.chains import ConversationalRetrievalChain from langchain.document_loaders import UnstructuredFileLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import RetrievalQAWithSourcesChain from huggingface_hub import notebook_login from transformers import pipeline from transformers import AutoTokenizer, AutoModelForCausalLM from langchain import HuggingFacePipeline from langchain.text_splitter import CharacterTextSplitter import textwrap import sys os.environ['HuggingFaceHub_API_Token']= 'xxxxxxxxxxxxxxxxxxxxxxx' path = "./" text_loader_kwargs={'autodetect_encoding': True} loader = DirectoryLoader(path, glob="**/*.docx", loader_cls=UnstructuredWordDocumentLoader,loader_kwargs=text_loader_kwargs) documents = loader.load() documents[0] text_splitter=CharacterTextSplitter(separator='\n\n', chunk_size=600, chunk_overlap=10) text_chunks=text_splitter.split_documents(documents) len(text_chunks) embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cuda'}) vectorstore=FAISS.from_documents(text_chunks, embeddings) import torch tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf",token="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") model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", device_map='auto', torch_dtype=torch.float16, #use_auth_token=True, #load_in_8bit=True, token="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", #load_in_4bit=True ) pipe = pipeline(task="text-generation", model=model, tokenizer= tokenizer, torch_dtype=torch.int8, device_map="auto", max_new_tokens = 512, do_sample=True, top_k=1, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id ) llm=HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature':0}) chain = RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff", retriever=vectorstore.as_retriever(), ) query = "what is the name of the software?" result=chain({"query": query}, return_only_outputs=True) wrapped_text = textwrap.fill(result['result'], width=200) context = wrapped_text context import locale locale.getpreferredencoding = lambda: "UTF-8" import gradio as gr import random import time with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.ClearButton([msg, chatbot]) def respond(message, chat_history): result = chain({"query": message}, return_only_outputs=True) bot_message = textwrap.fill(result['result'], width=200) chat_history.append((message, bot_message)) time.sleep(2) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) demo.launch()