Update app.py
Browse files
app.py
CHANGED
@@ -46,9 +46,9 @@ login(token=hf_token)
|
|
46 |
# Define the model pipeline with additional generation parameters
|
47 |
model_pipeline = pipeline(
|
48 |
# model="meta-llama/Llama-3.2-1B",
|
49 |
-
model="meta-llama/Llama-3.2-
|
50 |
#use_auth_token=hf_token,
|
51 |
-
max_length=1000, # You can increase this if needed
|
52 |
max_new_tokens=500 # Limit how many tokens are generated
|
53 |
)
|
54 |
|
@@ -177,31 +177,33 @@ def phonetic_match(text, query, method='levenshtein_distance', apply_phonetic=Tr
|
|
177 |
return 0
|
178 |
|
179 |
#def optimize_query(query, llm_model):
|
180 |
-
def optimize_query(
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
|
|
|
|
189 |
|
190 |
-
#llm = HuggingFacePipeline(pipeline(model="HuggingFaceH4/zephyr-7b-beta"))
|
191 |
-
|
192 |
-
|
193 |
# Create a temporary vector store for query optimization
|
194 |
temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
195 |
-
|
196 |
# Create a retriever with the temporary vector store
|
197 |
temp_retriever = get_retriever(temp_vector_store, search_type, {"k": top_k})
|
198 |
-
|
|
|
199 |
multi_query_retriever = MultiQueryRetriever.from_llm(
|
200 |
retriever=temp_retriever,
|
201 |
llm=llm
|
202 |
-
)
|
203 |
-
|
|
|
204 |
optimized_queries = multi_query_retriever.invoke(query)
|
|
|
205 |
return optimized_queries
|
206 |
|
207 |
|
@@ -593,7 +595,8 @@ def compare_embeddings(file, query, embedding_models, custom_embedding_model, sp
|
|
593 |
|
594 |
if use_query_optimization:
|
595 |
optimized_queries = optimize_query(query, query_optimization_model, chunks, embedding_model, vector_store_type, search_type, top_k)
|
596 |
-
query = " ".join(optimized_queries)
|
|
|
597 |
|
598 |
results, search_time, vector_store, results_raw = search_embeddings(
|
599 |
chunks,
|
|
|
46 |
# Define the model pipeline with additional generation parameters
|
47 |
model_pipeline = pipeline(
|
48 |
# model="meta-llama/Llama-3.2-1B",
|
49 |
+
model="meta-llama/Llama-3.2-3B-Instruct",
|
50 |
#use_auth_token=hf_token,
|
51 |
+
#max_length=1000, # You can increase this if needed
|
52 |
max_new_tokens=500 # Limit how many tokens are generated
|
53 |
)
|
54 |
|
|
|
177 |
return 0
|
178 |
|
179 |
#def optimize_query(query, llm_model):
|
180 |
+
def optimize_query(
|
181 |
+
query: str,
|
182 |
+
llm_model: str = "meta-llama/Llama-3.2-1B",
|
183 |
+
chunks: List[str] = None,
|
184 |
+
embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2",
|
185 |
+
vector_store_type: str = "faiss",
|
186 |
+
search_type: str = "similarity",
|
187 |
+
top_k: int = 5
|
188 |
+
) -> List[str]:
|
189 |
+
# Initialize the language model
|
190 |
+
llm = HuggingFacePipeline(model=llm_model)
|
191 |
|
|
|
|
|
|
|
192 |
# Create a temporary vector store for query optimization
|
193 |
temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
194 |
+
|
195 |
# Create a retriever with the temporary vector store
|
196 |
temp_retriever = get_retriever(temp_vector_store, search_type, {"k": top_k})
|
197 |
+
|
198 |
+
# Initialize MultiQueryRetriever with the temporary retriever and the language model
|
199 |
multi_query_retriever = MultiQueryRetriever.from_llm(
|
200 |
retriever=temp_retriever,
|
201 |
llm=llm
|
202 |
+
)
|
203 |
+
|
204 |
+
# Use a NoOpRunManager as the run manager
|
205 |
optimized_queries = multi_query_retriever.invoke(query)
|
206 |
+
|
207 |
return optimized_queries
|
208 |
|
209 |
|
|
|
595 |
|
596 |
if use_query_optimization:
|
597 |
optimized_queries = optimize_query(query, query_optimization_model, chunks, embedding_model, vector_store_type, search_type, top_k)
|
598 |
+
#query = " ".join(optimized_queries)
|
599 |
+
query = " ".join([doc.page_content for doc in optimized_queries]) # Extract text from Document objects
|
600 |
|
601 |
results, search_time, vector_store, results_raw = search_embeddings(
|
602 |
chunks,
|