More_Advanced_Embeddings_Comparator

Running

App Files Files Community

Chris4K commited on Oct 25

Commit

d3b0430

•

1 Parent(s): aa72e55

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -70

app.py CHANGED Viewed

@@ -105,7 +105,7 @@ from huggingface_hub import InferenceClient
 # NLTK Resource Download
 def download_nltk_resources():
-    resources = ['punkt', 'stopwords', 'snowball_data']
     for resource in resources:
         try:
             nltk.download(resource, quiet=False)
@@ -337,7 +337,7 @@ def optimize_query(
     vector_store_type: str,  # Added to match your signature
     search_type: str,  # Added to match your signature
     top_k: int = 3,
-    use_gpu: bool = True
 ) -> str:
     """
     CPU-optimized version of query expansion using a small language model.
@@ -354,7 +354,7 @@ def optimize_query(
     Returns:
         Expanded query string
-    """
     try:
         # Set device
         device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
@@ -372,74 +372,60 @@ def optimize_query(
                 expanded_terms.update([lemma.name() for lemma in syn.lemmas()[:2]])
         # 3. Use provided model with reduced complexity
-        try:
-            # Load model with reduced memory footprint
-            tokenizer = AutoTokenizer.from_pretrained(
-                query_optimization_model,  # Use the provided model name
-                model_max_length=128,
-                cache_dir="./model_cache"
-            )
-            model = AutoModelForSeq2Gen.from_pretrained(
-                query_optimization_model,  # Use the provided model name
-                low_cpu_mem_usage=True,
-                device_map="cpu"
-            )
-            # Move model to CPU and eval mode
-            model = model.to(device)
-            model.eval()
-            # Prepare input with reduced length
-            prompt = f"Enhance this search query with relevant terms: {query}"
-            inputs = tokenizer(
-                prompt,
-                return_tensors="pt",
-                max_length=64,
-                truncation=True,
-                padding=True
-            )
-            # Generate with minimal parameters
-            with torch.no_grad():
-                outputs = model.generate(
-                    inputs.input_ids.to(device),
-                    max_length=32,
                     num_return_sequences=1,
-                    temperature=0.7,
-                    do_sample=False,
-                    early_stopping=True
                 )
-            enhanced_query = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            # Clear CUDA cache if GPU was used
-            if device == "cuda":
-                torch.cuda.empty_cache()
-        except Exception as model_error:
-            print(f"Model-based expansion failed: {str(model_error)}")
-            enhanced_query = query
-        # 4. Combine original and expanded terms
-        final_terms = set(tokens)
-        final_terms.update(expanded_terms)
-        if enhanced_query != query:
-            final_terms.update(word_tokenize(enhanced_query.lower()))
         # 5. Remove stopwords and select top_k most relevant terms
         stopwords = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to'])
         final_terms = [term for term in final_terms if term not in stopwords]
         # Combine with original query
-        expanded_query = f"{query} {' '.join(list(final_terms)[:top_k])}"
         # Clean up
-        del model
-        del tokenizer
-        if device == "cuda":
-            torch.cuda.empty_cache()
-        return expanded_query.strip() #[Document(page_content=expanded_query.strip())]
     except Exception as e:
         print(f"Query optimization failed: {str(e)}")
@@ -1073,6 +1059,7 @@ def analyze_results(stats_df):
     return recommendations
     ####
 def get_llm_suggested_settings(file, num_chunks=1):
     if not file:
@@ -1092,7 +1079,7 @@ def get_llm_suggested_settings(file, num_chunks=1):
     sample_chunks = random.sample(chunks, min(num_chunks, len(chunks)))
-    llm_pipeline = pipeline(model="meta-llama/Llama-3.2-1B-Instruct", device='cuda')
     prompt=f'''
@@ -1155,17 +1142,16 @@ def get_llm_suggested_settings(file, num_chunks=1):
             max_new_tokens=1900,    # Control the length of the output,
             truncation=True,  # Enable truncation
         )
-    #suggested_settings = llm.invoke(prompt)
-    print("setting suggested")
-    print(suggested_settings)
-    # Parse the generated text to extract the dictionary
     try:
-        settings_dict = eval(suggested_settings)
         # Convert the settings to match the interface inputs
         return {
-            "embedding_models": f"{settings_dict['embedding_model_type']}:{settings_dict['embedding_model_name']}",
             "split_strategy": settings_dict["split_strategy"],
             "chunk_size": settings_dict["chunk_size"],
             "overlap_size": settings_dict["overlap_size"],
@@ -1173,13 +1159,15 @@ def get_llm_suggested_settings(file, num_chunks=1):
             "search_type": settings_dict["search_type"],
             "top_k": settings_dict["top_k"],
             "apply_preprocessing": settings_dict["apply_preprocessing"],
-            "optimize_vocab": settings_dict["optimize_vocabulary"],
-            "apply_phonetic": settings_dict["apply_phonetic_matching"],
-            "phonetic_weight": 0.3  # Default value, as it's not in the LLM suggestions
         }
-    except:
         return {"error": "Failed to parse LLM suggestions"}
 def update_inputs_with_llm_suggestions(suggestions):
     if suggestions is None or "error" in suggestions:
         return [gr.update() for _ in range(11)]  # Return no updates if there's an error or None

 # NLTK Resource Download
 def download_nltk_resources():
+    resources = ['punkt', 'stopwords', 'snowball_data', 'wordnet']
     for resource in resources:
         try:
             nltk.download(resource, quiet=False)
     vector_store_type: str,  # Added to match your signature
     search_type: str,  # Added to match your signature
     top_k: int = 3,
+    use_gpu: bool = False
 ) -> str:
     """
     CPU-optimized version of query expansion using a small language model.
     Returns:
         Expanded query string
+    """
     try:
         # Set device
         device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
                 expanded_terms.update([lemma.name() for lemma in syn.lemmas()[:2]])
         # 3. Use provided model with reduced complexity
+            try:
+                # Initialize the pipeline with the chosen model
+                llm_pipeline = pipeline(model="meta-llama/Llama-3.2-1B-Instruct", device='cpu')
+                # Define prompt for the assistant, making it context-specific
+                prompt = f'''
+                <|start_header_id|>system<|end_header_id|>
+                You are an expert in enhancing user input for vector store retrieval.
+                Enhance the followinf search query with relevant terms.
+                show me just the new term. You SHOULD NOT include any other text in the response.
+                <|eot_id|><|start_header_id|>user<|end_header_id|>
+                {query}
+                <|eot_id|><|start_header_id|>assistant<|end_header_id|>
+                '''
+                # Get suggested settings from the LLM
+                suggested_settings = llm_pipeline(
+                    prompt,
+                    do_sample=True,
+                    top_k=10,
                     num_return_sequences=1,
+                    return_full_text=False,
+                    max_new_tokens=1900,    # Control the length of the output
+                    truncation=True  # Enable truncation
                 )
+                # Extract the settings from the generated response
+                generated_text = suggested_settings[0].get('generated_text', '')
+                print(generated_text)  # For debugging, ensure text output is as expected
+            except Exception as model_error:
+                print(f"LLM-based expansion failed: {str(model_error)}")
+                generated_text = "Default settings could not be generated."  # Fallback message or settings
+            # 4. Combine original and expanded terms
+            final_terms = set(tokens)
+            final_terms.update(expanded_terms)
+            if generated_text != query:
+                final_terms.update(word_tokenize(generated_text.lower()))
         # 5. Remove stopwords and select top_k most relevant terms
         stopwords = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to'])
         final_terms = [term for term in final_terms if term not in stopwords]
         # Combine with original query
+        generated_text = f"{query} {' '.join(list(final_terms)[:top_k])}"
+        print(generated_text)
         # Clean up
+       # llm_pipeline = None
+        return generated_text.strip() #[Document(page_content=generated_text.strip())]
     except Exception as e:
         print(f"Query optimization failed: {str(e)}")
     return recommendations
     ####
+import ast
 def get_llm_suggested_settings(file, num_chunks=1):
     if not file:
     sample_chunks = random.sample(chunks, min(num_chunks, len(chunks)))
+    llm_pipeline = pipeline(model="meta-llama/Llama-3.2-1B-Instruct", device='cpu')
     prompt=f'''
             max_new_tokens=1900,    # Control the length of the output,
             truncation=True,  # Enable truncation
         )
+    print(suggested_settings[0]['generated_text'])
+    # Safely parse the generated text to extract the dictionary
     try:
+        # Using ast.literal_eval for safe parsing
+        settings_dict = ast.literal_eval(suggested_settings[0]['generated_text'])
         # Convert the settings to match the interface inputs
         return {
+            "embedding_models": settings_dict["embedding_models"],
             "split_strategy": settings_dict["split_strategy"],
             "chunk_size": settings_dict["chunk_size"],
             "overlap_size": settings_dict["overlap_size"],
             "search_type": settings_dict["search_type"],
             "top_k": settings_dict["top_k"],
             "apply_preprocessing": settings_dict["apply_preprocessing"],
+            "optimize_vocab": settings_dict["optimize_vocab"],
+            "apply_phonetic": settings_dict["apply_phonetic"],
+            "phonetic_weight": settings_dict.get("phonetic_weight", 0.3)  # Set default if not provided
         }
+    except Exception as e:
+        print(f"Error parsing LLM suggestions: {e}")
         return {"error": "Failed to parse LLM suggestions"}
 def update_inputs_with_llm_suggestions(suggestions):
     if suggestions is None or "error" in suggestions:
         return [gr.update() for _ in range(11)]  # Return no updates if there's an error or None