Hackoor commited on
Commit
1418f91
·
1 Parent(s): 6fddfa6

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +39 -0
  2. requirements.txt +15 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.vectorstores import FAISS
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from langchain.chains.question_answering import load_qa_chain
5
+ from langchain.prompts import PromptTemplate
6
+ from langchain.memory import ConversationBufferMemory
7
+ from langchain.embeddings import HuggingFaceEmbeddings
8
+ from langchain.chains import RetrievalQA
9
+ from langchain.document_loaders import UnstructuredFileLoader
10
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+ from langchain.chains import RetrievalQAWithSourcesChain
12
+ from huggingface_hub import notebook_login
13
+ from transformers import pipeline
14
+ from transformers import AutoTokenizer, AutoModelForCausalLM
15
+ from langchain import HuggingFacePipeline
16
+ from langchain.text_splitter import CharacterTextSplitter
17
+ import textwrap
18
+ import sys
19
+ import torch
20
+ os.environ['HuggingFaceHub_API_Token']= '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'
21
+ loader = UnstructuredFileLoader('/content/Highway Traffic Act, R.S.O. 1990, c. H.8.pdf')
22
+ documents = loader.load()
23
+ text_splitter=CharacterTextSplitter(separator='\n',chunk_size=1500,chunk_overlap=300)
24
+ text_chunks=text_splitter.split_documents(documents)
25
+ embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cuda'})
26
+ vectorstore=FAISS.from_documents(text_chunks, embeddings)
27
+ notebook_login()
28
+ os.environ['HuggingFaceHub_API_Token']= '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'
29
+ tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-hf")
30
+ model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-hf", device_map='auto',torch_dtype=torch.float16,load_in_4bit=True, token=True )
31
+ pipe = pipeline("text-generation",model=model,tokenizer= tokenizer,torch_dtype=torch.bfloat16,device_map="auto",max_new_tokens = 1024,do_sample=True,top_k=10,num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
32
+
33
+ llm=HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature':0.5})
34
+ chain = RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff",return_source_documents=True, retriever=vectorstore.as_retriever())
35
+
36
+ query = "Can goat and paint be transported in same truck ?"
37
+ result=chain({"query": query}, return_only_outputs=True)
38
+ wrapped_text = textwrap.fill(result['result'], width=500)
39
+ wrapped_text
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ torch
3
+ accelerate
4
+ sentence_transformers
5
+ streamlit_chat
6
+ streamlit
7
+ faiss-cpu
8
+ tiktoken
9
+ transformers
10
+ huggingface-hub
11
+ pypdf
12
+ python-dotenv
13
+ replicate
14
+ docx2txt
15
+