Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -32,8 +32,25 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
32 |
|
33 |
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
|
34 |
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
retriever = db.as_retriever(
|
38 |
search_type="mmr",
|
39 |
search_kwargs={'k': 1}
|
|
|
32 |
|
33 |
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
|
34 |
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
|
35 |
+
from langchain_community.document_loaders import TextLoader
|
36 |
+
from langchain_text_splitters import CharacterTextSplitter
|
37 |
+
from google.colab import drive
|
38 |
+
from langchain.document_loaders import PyPDFLoader, OnlinePDFLoader
|
39 |
+
|
40 |
+
# Montez Google Drive
|
41 |
+
loader = PyPDFLoader("test-1.pdf")
|
42 |
+
data = loader.load()
|
43 |
+
# split the documents into chunks
|
44 |
+
text_splitter1 = CharacterTextSplitter(chunk_size=512, chunk_overlap=0,separator="\n\n")
|
45 |
+
texts = text_splitter1.split_documents(data)
|
46 |
+
db = FAISS.from_documents(texts,
|
47 |
+
HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L12-v2'))
|
48 |
+
|
49 |
+
|
50 |
+
# Connect query to FAISS index using a retriever
|
51 |
+
retriever = db.as_retriever(
|
52 |
+
search_type="mmr",
|
53 |
+
search_kwargs={'k': 1}
|
54 |
retriever = db.as_retriever(
|
55 |
search_type="mmr",
|
56 |
search_kwargs={'k': 1}
|