Update app.py
Browse files
app.py
CHANGED
@@ -161,11 +161,17 @@ def phonetic_match(text, query, method='levenshtein_distance', apply_phonetic=Tr
|
|
161 |
|
162 |
#def optimize_query(query, llm_model):
|
163 |
def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
# Create a temporary vector store for query optimization
|
171 |
temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
@@ -443,11 +449,17 @@ def optimize_vocabulary(texts, vocab_size=10000, min_frequency=2):
|
|
443 |
|
444 |
# New preprocessing function
|
445 |
def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
451 |
|
452 |
# Create a temporary vector store for query optimization
|
453 |
temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
@@ -633,8 +645,8 @@ def automated_testing(file, query, test_params, expected_result=None):
|
|
633 |
|
634 |
stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, params['top_k'], expected_result)
|
635 |
stats["model"] = f"{params['model_type']} - {params['model_name']}"
|
636 |
-
stats["model_type"] = model_type
|
637 |
-
stats["model_name"] = model_name
|
638 |
stats.update(params)
|
639 |
|
640 |
all_results.extend(format_results(results_raw, stats))
|
@@ -732,15 +744,16 @@ Text chunks:
|
|
732 |
Provide your suggestions in a Python dictionary format."""
|
733 |
|
734 |
# Use a HuggingFace model for text generation
|
735 |
-
model_id = "google/flan-t5-large"
|
736 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
737 |
-
model = AutoModelForCausalLM.from_pretrained(model_id)
|
738 |
-
pipe = pipeline(
|
739 |
-
|
740 |
-
)
|
741 |
-
llm = HuggingFacePipeline(pipeline=pipe)
|
742 |
-
|
743 |
|
|
|
|
|
744 |
|
745 |
#llm = HuggingFacePipeline.from_model_id(
|
746 |
# model_id="google/flan-t5-large",
|
|
|
161 |
|
162 |
#def optimize_query(query, llm_model):
|
163 |
def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
|
164 |
+
# Use a HuggingFace model for text generation
|
165 |
+
#model_id = "google/flan-t5-large"
|
166 |
+
#tokenizer = AutoTokenizer.from_pretrained(model_id)
|
167 |
+
#model = AutoModelForCausalLM.from_pretrained(model_id)
|
168 |
+
#pipe = pipeline(
|
169 |
+
# "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512
|
170 |
+
#)
|
171 |
+
#llm = HuggingFacePipeline(pipeline=pipe)
|
172 |
+
|
173 |
+
llm = HuggingFacePipeline(pipeline(model="HuggingFaceH4/zephyr-7b-beta"))
|
174 |
+
|
175 |
|
176 |
# Create a temporary vector store for query optimization
|
177 |
temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
|
|
449 |
|
450 |
# New preprocessing function
|
451 |
def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
|
452 |
+
# Use a HuggingFace model for text generation
|
453 |
+
#model_id = "google/flan-t5-large"
|
454 |
+
#tokenizer = AutoTokenizer.from_pretrained(model_id)
|
455 |
+
#model = AutoModelForCausalLM.from_pretrained(model_id)
|
456 |
+
#pipe = pipeline(
|
457 |
+
# "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512
|
458 |
+
#)
|
459 |
+
#llm = HuggingFacePipeline(pipeline=pipe)
|
460 |
+
|
461 |
+
llm = HuggingFacePipeline(pipeline(model="HuggingFaceH4/zephyr-7b-beta"))
|
462 |
+
|
463 |
|
464 |
# Create a temporary vector store for query optimization
|
465 |
temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
|
|
645 |
|
646 |
stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, params['top_k'], expected_result)
|
647 |
stats["model"] = f"{params['model_type']} - {params['model_name']}"
|
648 |
+
stats["model_type"] = params['model_type']
|
649 |
+
stats["model_name"] = params['model_name']
|
650 |
stats.update(params)
|
651 |
|
652 |
all_results.extend(format_results(results_raw, stats))
|
|
|
744 |
Provide your suggestions in a Python dictionary format."""
|
745 |
|
746 |
# Use a HuggingFace model for text generation
|
747 |
+
#model_id = "google/flan-t5-large"
|
748 |
+
#tokenizer = AutoTokenizer.from_pretrained(model_id)
|
749 |
+
#model = AutoModelForCausalLM.from_pretrained(model_id)
|
750 |
+
#pipe = pipeline(
|
751 |
+
# "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512
|
752 |
+
#)
|
753 |
+
#llm = HuggingFacePipeline(pipeline=pipe)
|
|
|
754 |
|
755 |
+
llm = HuggingFacePipeline(pipeline(model="HuggingFaceH4/zephyr-7b-beta"))
|
756 |
+
|
757 |
|
758 |
#llm = HuggingFacePipeline.from_model_id(
|
759 |
# model_id="google/flan-t5-large",
|