mery22 commited on
Commit
6a7d03a
1 Parent(s): 100db0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -2
app.py CHANGED
@@ -32,8 +32,25 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
32
 
33
  tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
34
  model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
35
-
36
- ectorstore=FAISS.load_local(folder_path="index.faiss", embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L12-v2'), allow_dangerous_deserialization=True)# Connect query to FAISS index using a retriever
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  retriever = db.as_retriever(
38
  search_type="mmr",
39
  search_kwargs={'k': 1}
 
32
 
33
  tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
34
  model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
35
+ from langchain_community.document_loaders import TextLoader
36
+ from langchain_text_splitters import CharacterTextSplitter
37
+ from google.colab import drive
38
+ from langchain.document_loaders import PyPDFLoader, OnlinePDFLoader
39
+
40
+ # Montez Google Drive
41
+ loader = PyPDFLoader("test-1.pdf")
42
+ data = loader.load()
43
+ # split the documents into chunks
44
+ text_splitter1 = CharacterTextSplitter(chunk_size=512, chunk_overlap=0,separator="\n\n")
45
+ texts = text_splitter1.split_documents(data)
46
+ db = FAISS.from_documents(texts,
47
+ HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L12-v2'))
48
+
49
+
50
+ # Connect query to FAISS index using a retriever
51
+ retriever = db.as_retriever(
52
+ search_type="mmr",
53
+ search_kwargs={'k': 1}
54
  retriever = db.as_retriever(
55
  search_type="mmr",
56
  search_kwargs={'k': 1}