cheeku2 commited on
Commit
c2883be
1 Parent(s): 41610a4

Update app/rag.py

Browse files
Files changed (1) hide show
  1. app/rag.py +18 -27
app/rag.py CHANGED
@@ -4,8 +4,7 @@ from llama_index.core import (
4
  SimpleDirectoryReader,
5
  VectorStoreIndex,
6
  StorageContext,
7
- Settings,
8
- get_response_synthesizer)
9
  from llama_index.core.node_parser import SentenceSplitter
10
  from llama_index.core.schema import TextNode, MetadataMode
11
  from llama_index.core.vector_stores import VectorStoreQuery
@@ -20,6 +19,8 @@ store_dir = os.path.expanduser("~/wtp_be_store/")
20
  logging.basicConfig(level=logging.INFO)
21
  logger = logging.getLogger(__name__)
22
 
 
 
23
  class ChatPDF:
24
  pdf_count = 0
25
  text_chunks = []
@@ -33,33 +34,26 @@ class ChatPDF:
33
  self.client = QdrantClient(path=store_dir)
34
  self.vector_store = QdrantVectorStore(
35
  client=self.client,
36
- collection_name="rag_documents",
37
- # enable_hybrid=True
38
  )
39
 
40
  logger.info("initializing the FastEmbedEmbedding")
41
- self.embed_model = FastEmbedEmbedding(
42
- # model_name="BAAI/bge-small-en"
43
- )
44
 
45
  llm = LlamaCPP(
46
- model_url="https://huggingface.co/Qwen/Qwen2-0.5B-Instruct-GGUF/resolve/main/qwen2-0_5b-instruct-fp16.gguf",
47
  temperature=0.1,
 
48
  max_new_tokens=256,
49
- generate_kwargs={"max_tokens": 256, "temperature": 0.1, "top_k": 3},
50
- # messages_to_prompt=self.messages_to_prompt,
51
- # completion_to_prompt=self.completion_to_prompt,
52
  verbose=True,
53
  )
54
 
55
- # tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
56
- # tokenizer.save_pretrained("./models/tokenizer/")
57
-
58
  logger.info("initializing the global settings")
59
  Settings.text_splitter = self.text_parser
60
  Settings.embed_model = self.embed_model
61
  Settings.llm = llm
62
- # Settings.tokenzier = tokenizer
63
  Settings.transformations = [self.text_parser]
64
 
65
  def ingest(self, files_dir: str):
@@ -73,10 +67,8 @@ class ChatPDF:
73
  self.doc_ids.extend([doc_idx] * len(curr_text_chunks))
74
 
75
  logger.info("enumerating text_chunks")
76
- for idx, text_chunk in enumerate(self.text_chunks):
77
  node = TextNode(text=text_chunk)
78
- # src_doc = docs[self.doc_ids[idx]]
79
- # node.metadata = src_doc.metadata
80
  if node.get_content(metadata_mode=MetadataMode.EMBED):
81
  self.nodes.append(node)
82
 
@@ -93,7 +85,7 @@ class ChatPDF:
93
  index = VectorStoreIndex(
94
  nodes=self.nodes,
95
  storage_context=storage_context,
96
- transformations=Settings.transformations,
97
  )
98
 
99
  self.query_engine = index.as_query_engine(
@@ -103,14 +95,13 @@ class ChatPDF:
103
 
104
  def ask(self, query: str):
105
  logger.info("retrieving the response to the query")
106
- streaming_response = self.query_engine.query(query)
 
107
  return streaming_response
108
 
109
  def clear(self):
110
- # self.vector_store.clear()
111
- if self.nodes:
112
- self.vector_store.delete_nodes(self.nodes)
113
- self.pdf_count = 0
114
- self.text_chunks = []
115
- self.doc_ids = []
116
- self.nodes = []
 
4
  SimpleDirectoryReader,
5
  VectorStoreIndex,
6
  StorageContext,
7
+ Settings)
 
8
  from llama_index.core.node_parser import SentenceSplitter
9
  from llama_index.core.schema import TextNode, MetadataMode
10
  from llama_index.core.vector_stores import VectorStoreQuery
 
19
  logging.basicConfig(level=logging.INFO)
20
  logger = logging.getLogger(__name__)
21
 
22
+ model_url = "https://huggingface.co/Qwen/Qwen2-0.5B-Instruct-GGUF/resolve/main/qwen2-0_5b-instruct-q4_k_m.gguf"
23
+
24
  class ChatPDF:
25
  pdf_count = 0
26
  text_chunks = []
 
34
  self.client = QdrantClient(path=store_dir)
35
  self.vector_store = QdrantVectorStore(
36
  client=self.client,
37
+ collection_name="rag_documents"
 
38
  )
39
 
40
  logger.info("initializing the FastEmbedEmbedding")
41
+ self.embed_model = FastEmbedEmbedding()
 
 
42
 
43
  llm = LlamaCPP(
44
+ model_url=model_url,
45
  temperature=0.1,
46
+ model_path=None,
47
  max_new_tokens=256,
48
+ context_window=29440,
49
+ generate_kwargs={},
 
50
  verbose=True,
51
  )
52
 
 
 
 
53
  logger.info("initializing the global settings")
54
  Settings.text_splitter = self.text_parser
55
  Settings.embed_model = self.embed_model
56
  Settings.llm = llm
 
57
  Settings.transformations = [self.text_parser]
58
 
59
  def ingest(self, files_dir: str):
 
67
  self.doc_ids.extend([doc_idx] * len(curr_text_chunks))
68
 
69
  logger.info("enumerating text_chunks")
70
+ for text_chunk in self.text_chunks:
71
  node = TextNode(text=text_chunk)
 
 
72
  if node.get_content(metadata_mode=MetadataMode.EMBED):
73
  self.nodes.append(node)
74
 
 
85
  index = VectorStoreIndex(
86
  nodes=self.nodes,
87
  storage_context=storage_context,
88
+ transformations=Settings.transformations
89
  )
90
 
91
  self.query_engine = index.as_query_engine(
 
95
 
96
  def ask(self, query: str):
97
  logger.info("retrieving the response to the query")
98
+ streaming_response = self.query_engine.query("You are an assistant for question-answering tasks. Use three \
99
+ sentences only and keep the answer concise.\n\n" + query)
100
  return streaming_response
101
 
102
  def clear(self):
103
+ self.vector_store.clear()
104
+ self.pdf_count = 0
105
+ self.text_chunks = []
106
+ self.doc_ids = []
107
+ self.nodes = []