Chris4K commited on
Commit
a14da67
1 Parent(s): a5caef8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -20
app.py CHANGED
@@ -161,11 +161,17 @@ def phonetic_match(text, query, method='levenshtein_distance', apply_phonetic=Tr
161
 
162
  #def optimize_query(query, llm_model):
163
  def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
164
- llm = HuggingFacePipeline.from_model_id(
165
- model_id=llm_model,
166
- task="text2text-generation",
167
- model_kwargs={"do_sample": True, "temperature": 0, "max_new_tokens": 64},
168
- )
 
 
 
 
 
 
169
 
170
  # Create a temporary vector store for query optimization
171
  temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
@@ -443,11 +449,17 @@ def optimize_vocabulary(texts, vocab_size=10000, min_frequency=2):
443
 
444
  # New preprocessing function
445
  def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
446
- llm = HuggingFacePipeline.from_model_id(
447
- model_id=llm_model,
448
- task="text2text-generation",
449
- model_kwargs={"do_sample": True, "temperature": 0, "max_new_tokens": 64},
450
- )
 
 
 
 
 
 
451
 
452
  # Create a temporary vector store for query optimization
453
  temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
@@ -633,8 +645,8 @@ def automated_testing(file, query, test_params, expected_result=None):
633
 
634
  stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, params['top_k'], expected_result)
635
  stats["model"] = f"{params['model_type']} - {params['model_name']}"
636
- stats["model_type"] = model_type
637
- stats["model_name"] = model_name
638
  stats.update(params)
639
 
640
  all_results.extend(format_results(results_raw, stats))
@@ -732,15 +744,16 @@ Text chunks:
732
  Provide your suggestions in a Python dictionary format."""
733
 
734
  # Use a HuggingFace model for text generation
735
- model_id = "google/flan-t5-large"
736
- tokenizer = AutoTokenizer.from_pretrained(model_id)
737
- model = AutoModelForCausalLM.from_pretrained(model_id)
738
- pipe = pipeline(
739
- "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512
740
- )
741
- llm = HuggingFacePipeline(pipeline=pipe)
742
-
743
 
 
 
744
 
745
  #llm = HuggingFacePipeline.from_model_id(
746
  # model_id="google/flan-t5-large",
 
161
 
162
  #def optimize_query(query, llm_model):
163
  def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
164
+ # Use a HuggingFace model for text generation
165
+ #model_id = "google/flan-t5-large"
166
+ #tokenizer = AutoTokenizer.from_pretrained(model_id)
167
+ #model = AutoModelForCausalLM.from_pretrained(model_id)
168
+ #pipe = pipeline(
169
+ # "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512
170
+ #)
171
+ #llm = HuggingFacePipeline(pipeline=pipe)
172
+
173
+ llm = HuggingFacePipeline(pipeline(model="HuggingFaceH4/zephyr-7b-beta"))
174
+
175
 
176
  # Create a temporary vector store for query optimization
177
  temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
 
449
 
450
  # New preprocessing function
451
  def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
452
+ # Use a HuggingFace model for text generation
453
+ #model_id = "google/flan-t5-large"
454
+ #tokenizer = AutoTokenizer.from_pretrained(model_id)
455
+ #model = AutoModelForCausalLM.from_pretrained(model_id)
456
+ #pipe = pipeline(
457
+ # "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512
458
+ #)
459
+ #llm = HuggingFacePipeline(pipeline=pipe)
460
+
461
+ llm = HuggingFacePipeline(pipeline(model="HuggingFaceH4/zephyr-7b-beta"))
462
+
463
 
464
  # Create a temporary vector store for query optimization
465
  temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
 
645
 
646
  stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, params['top_k'], expected_result)
647
  stats["model"] = f"{params['model_type']} - {params['model_name']}"
648
+ stats["model_type"] = params['model_type']
649
+ stats["model_name"] = params['model_name']
650
  stats.update(params)
651
 
652
  all_results.extend(format_results(results_raw, stats))
 
744
  Provide your suggestions in a Python dictionary format."""
745
 
746
  # Use a HuggingFace model for text generation
747
+ #model_id = "google/flan-t5-large"
748
+ #tokenizer = AutoTokenizer.from_pretrained(model_id)
749
+ #model = AutoModelForCausalLM.from_pretrained(model_id)
750
+ #pipe = pipeline(
751
+ # "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512
752
+ #)
753
+ #llm = HuggingFacePipeline(pipeline=pipe)
 
754
 
755
+ llm = HuggingFacePipeline(pipeline(model="HuggingFaceH4/zephyr-7b-beta"))
756
+
757
 
758
  #llm = HuggingFacePipeline.from_model_id(
759
  # model_id="google/flan-t5-large",