Chris4K commited on
Commit
12758a0
1 Parent(s): 17b6044

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -19
app.py CHANGED
@@ -46,9 +46,9 @@ login(token=hf_token)
46
  # Define the model pipeline with additional generation parameters
47
  model_pipeline = pipeline(
48
  # model="meta-llama/Llama-3.2-1B",
49
- model="meta-llama/Llama-3.2-1B",
50
  #use_auth_token=hf_token,
51
- max_length=1000, # You can increase this if needed
52
  max_new_tokens=500 # Limit how many tokens are generated
53
  )
54
 
@@ -177,31 +177,33 @@ def phonetic_match(text, query, method='levenshtein_distance', apply_phonetic=Tr
177
  return 0
178
 
179
  #def optimize_query(query, llm_model):
180
- def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
181
- # Use a HuggingFace model for text generation
182
- #model_id = "google/flan-t5-large"
183
- #tokenizer = AutoTokenizer.from_pretrained(model_id)
184
- #model = AutoModelForCausalLM.from_pretrained(model_id)
185
- #pipe = pipeline(
186
- # "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512
187
- #)
188
- #llm = HuggingFacePipeline(pipeline=pipe)
 
 
189
 
190
- #llm = HuggingFacePipeline(pipeline(model="HuggingFaceH4/zephyr-7b-beta"))
191
-
192
-
193
  # Create a temporary vector store for query optimization
194
  temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
195
-
196
  # Create a retriever with the temporary vector store
197
  temp_retriever = get_retriever(temp_vector_store, search_type, {"k": top_k})
198
-
 
199
  multi_query_retriever = MultiQueryRetriever.from_llm(
200
  retriever=temp_retriever,
201
  llm=llm
202
- )
203
- # Use a NoOpRunManager as the run_manager
 
204
  optimized_queries = multi_query_retriever.invoke(query)
 
205
  return optimized_queries
206
 
207
 
@@ -593,7 +595,8 @@ def compare_embeddings(file, query, embedding_models, custom_embedding_model, sp
593
 
594
  if use_query_optimization:
595
  optimized_queries = optimize_query(query, query_optimization_model, chunks, embedding_model, vector_store_type, search_type, top_k)
596
- query = " ".join(optimized_queries)
 
597
 
598
  results, search_time, vector_store, results_raw = search_embeddings(
599
  chunks,
 
46
  # Define the model pipeline with additional generation parameters
47
  model_pipeline = pipeline(
48
  # model="meta-llama/Llama-3.2-1B",
49
+ model="meta-llama/Llama-3.2-3B-Instruct",
50
  #use_auth_token=hf_token,
51
+ #max_length=1000, # You can increase this if needed
52
  max_new_tokens=500 # Limit how many tokens are generated
53
  )
54
 
 
177
  return 0
178
 
179
  #def optimize_query(query, llm_model):
180
+ def optimize_query(
181
+ query: str,
182
+ llm_model: str = "meta-llama/Llama-3.2-1B",
183
+ chunks: List[str] = None,
184
+ embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2",
185
+ vector_store_type: str = "faiss",
186
+ search_type: str = "similarity",
187
+ top_k: int = 5
188
+ ) -> List[str]:
189
+ # Initialize the language model
190
+ llm = HuggingFacePipeline(model=llm_model)
191
 
 
 
 
192
  # Create a temporary vector store for query optimization
193
  temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
194
+
195
  # Create a retriever with the temporary vector store
196
  temp_retriever = get_retriever(temp_vector_store, search_type, {"k": top_k})
197
+
198
+ # Initialize MultiQueryRetriever with the temporary retriever and the language model
199
  multi_query_retriever = MultiQueryRetriever.from_llm(
200
  retriever=temp_retriever,
201
  llm=llm
202
+ )
203
+
204
+ # Use a NoOpRunManager as the run manager
205
  optimized_queries = multi_query_retriever.invoke(query)
206
+
207
  return optimized_queries
208
 
209
 
 
595
 
596
  if use_query_optimization:
597
  optimized_queries = optimize_query(query, query_optimization_model, chunks, embedding_model, vector_store_type, search_type, top_k)
598
+ #query = " ".join(optimized_queries)
599
+ query = " ".join([doc.page_content for doc in optimized_queries]) # Extract text from Document objects
600
 
601
  results, search_time, vector_store, results_raw = search_embeddings(
602
  chunks,