JBHF commited on
Commit
1c70265
1 Parent(s): cefc98f

Rename app.py to rag.py

Browse files
Files changed (2) hide show
  1. app.py +0 -0
  2. rag.py +61 -0
app.py DELETED
File without changes
rag.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # rag.py
2
+
3
+ from langchain.vectorstores import Chroma
4
+ from langchain.chat_models import ChatOllama
5
+ from langchain.embeddings import FastEmbedEmbeddings
6
+ from langchain.schema.output_parser import StrOutputParser
7
+ from langchain.document_loaders import PyPDFLoader
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain.schema.runnable import RunnablePassthrough
10
+ from langchain.prompts import PromptTemplate
11
+ from langchain.vectorstores.utils import filter_complex_metadata
12
+
13
+
14
+ class ChatPDF:
15
+ vector_store = None
16
+ retriever = None
17
+ chain = None
18
+
19
+ def __init__(self):
20
+ self.model = ChatOllama(model="mistral")
21
+ self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
22
+ self.prompt = PromptTemplate.from_template(
23
+ """
24
+ <s> [INST] You are an assistant for question-answering tasks. Use the following pieces of retrieved context
25
+ to answer the question. If you don't know the answer, just say that you don't know. Use three sentences
26
+ maximum and keep the answer concise. [/INST] </s>
27
+ [INST] Question: {question}
28
+ Context: {context}
29
+ Answer: [/INST]
30
+ """
31
+ )
32
+
33
+ def ingest(self, pdf_file_path: str):
34
+ docs = PyPDFLoader(file_path=pdf_file_path).load()
35
+ chunks = self.text_splitter.split_documents(docs)
36
+ chunks = filter_complex_metadata(chunks)
37
+
38
+ vector_store = Chroma.from_documents(documents=chunks, embedding=FastEmbedEmbeddings())
39
+ self.retriever = vector_store.as_retriever(
40
+ search_type="similarity_score_threshold",
41
+ search_kwargs={
42
+ "k": 3,
43
+ "score_threshold": 0.5,
44
+ },
45
+ )
46
+
47
+ self.chain = ({"context": self.retriever, "question": RunnablePassthrough()}
48
+ | self.prompt
49
+ | self.model
50
+ | StrOutputParser())
51
+
52
+ def ask(self, query: str):
53
+ if not self.chain:
54
+ return "Please, add a PDF document first."
55
+
56
+ return self.chain.invoke(query)
57
+
58
+ def clear(self):
59
+ self.vector_store = None
60
+ self.retriever = None
61
+ self.chain = None