alx-d commited on
Commit
f840733
Β·
verified Β·
1 Parent(s): 1548715

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ __pycache__/advanced_rag.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
__pycache__/advanced_rag.cpython-311.pyc CHANGED
Binary files a/__pycache__/advanced_rag.cpython-311.pyc and b/__pycache__/advanced_rag.cpython-311.pyc differ
 
__pycache__/test_filename_generation.cpython-311.pyc ADDED
Binary file (5.14 kB). View file
 
advanced_rag.py CHANGED
@@ -105,8 +105,13 @@ def process_batch_query(query, model_choice, max_tokens, param_configs, slider_v
105
  "Progress": f"Query {current}/{total_combinations}"
106
  })
107
 
108
- # Format results with CSV file links
109
- formatted_results, csv_path = format_batch_result_files(results, job_id)
 
 
 
 
 
110
 
111
  return (
112
  formatted_results,
@@ -1388,10 +1393,64 @@ def format_response(response: str) -> str:
1388
 
1389
  def reset_app_updated():
1390
  global rag_chain
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1391
  rag_chain = ElevatedRagChain()
1392
- debug_print("App reset successfully.")
 
 
 
 
1393
  return (
1394
- "App reset successfully. You can now load new files",
1395
  "",
1396
  "Model used: Not selected"
1397
  )
@@ -2172,12 +2231,51 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
2172
  outputs=[csv_download_html_batch, csv_download_file_batch, csv_file_info_df_batch]
2173
  )
2174
 
2175
- def create_csv_from_batch_results(results: List[Dict], job_id: str) -> str:
 
 
2176
  """Create a CSV file from batch query results and return the file path"""
2177
  # Save CSV files in the current directory for HuggingFace Spaces compatibility
2178
- # Create a unique filename using job_id and timestamp
 
2179
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
2180
- csv_filename = f"batch_results_{job_id}_{timestamp}.csv"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2181
  csv_path = os.path.abspath(csv_filename)
2182
 
2183
  # Extract parameters and responses
@@ -2233,10 +2331,12 @@ def create_csv_from_batch_results(results: List[Dict], job_id: str) -> str:
2233
 
2234
  return csv_path
2235
 
2236
- def format_batch_result_files(results: List[Dict], job_id: str) -> Tuple[str, str]:
 
 
2237
  """Format batch results with links to CSV files"""
2238
- # Create CSV file
2239
- csv_path = create_csv_from_batch_results(results, job_id)
2240
 
2241
  # Format the results
2242
  formatted_results = "### Batch Query Results\n\n"
 
105
  "Progress": f"Query {current}/{total_combinations}"
106
  })
107
 
108
+ # Format results with CSV file links - UPDATED TO PASS ADDITIONAL PARAMETERS
109
+ formatted_results, csv_path = format_batch_result_files(
110
+ results, job_id,
111
+ embedding_model=getattr(rag_chain, 'embedding_model', 'unknown'),
112
+ llm_model=model_choice,
113
+ param_variations=param_configs
114
+ )
115
 
116
  return (
117
  formatted_results,
 
1393
 
1394
  def reset_app_updated():
1395
  global rag_chain
1396
+
1397
+ # Properly clean up the existing vector database components
1398
+ if hasattr(rag_chain, 'vector_store'):
1399
+ try:
1400
+ del rag_chain.vector_store
1401
+ except:
1402
+ pass
1403
+
1404
+ if hasattr(rag_chain, 'faiss_retriever'):
1405
+ try:
1406
+ del rag_chain.faiss_retriever
1407
+ except:
1408
+ pass
1409
+
1410
+ if hasattr(rag_chain, 'bm25_retriever'):
1411
+ try:
1412
+ del rag_chain.bm25_retriever
1413
+ except:
1414
+ pass
1415
+
1416
+ if hasattr(rag_chain, 'ensemble_retriever'):
1417
+ try:
1418
+ del rag_chain.ensemble_retriever
1419
+ except:
1420
+ pass
1421
+
1422
+ # Clear data references
1423
+ if hasattr(rag_chain, 'raw_data'):
1424
+ rag_chain.raw_data = None
1425
+ if hasattr(rag_chain, 'split_data'):
1426
+ rag_chain.split_data = None
1427
+ if hasattr(rag_chain, 'context'):
1428
+ rag_chain.context = ""
1429
+ if hasattr(rag_chain, 'conversation_history'):
1430
+ rag_chain.conversation_history = []
1431
+
1432
+ # Clear other components
1433
+ if hasattr(rag_chain, 'text_splitter'):
1434
+ try:
1435
+ del rag_chain.text_splitter
1436
+ except:
1437
+ pass
1438
+
1439
+ if hasattr(rag_chain, 'elevated_rag_chain'):
1440
+ try:
1441
+ del rag_chain.elevated_rag_chain
1442
+ except:
1443
+ pass
1444
+
1445
+ # Create a new instance
1446
  rag_chain = ElevatedRagChain()
1447
+
1448
+ # Force garbage collection to free memory
1449
+ gc.collect()
1450
+
1451
+ debug_print("App reset successfully. Vector database and all components cleaned up.")
1452
  return (
1453
+ "App reset successfully. Vector database and all components cleaned up. You can now load new files",
1454
  "",
1455
  "Model used: Not selected"
1456
  )
 
2231
  outputs=[csv_download_html_batch, csv_download_file_batch, csv_file_info_df_batch]
2232
  )
2233
 
2234
+ def create_csv_from_batch_results(results: List[Dict], job_id: str,
2235
+ embedding_model: str = None, llm_model: str = None,
2236
+ param_variations: Dict = None) -> str:
2237
  """Create a CSV file from batch query results and return the file path"""
2238
  # Save CSV files in the current directory for HuggingFace Spaces compatibility
2239
+
2240
+ # Create a descriptive filename
2241
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
2242
+
2243
+ # Extract short names for filename
2244
+ def get_short_name(full_name, prefix_length=2):
2245
+ """Extract short name from full model name"""
2246
+ if not full_name:
2247
+ return "unknown"
2248
+ # Remove emojis and get the actual model name
2249
+ clean_name = full_name.split(" ", 1)[-1] if " " in full_name else full_name
2250
+ # Get first few characters and last few characters
2251
+ if len(clean_name) > 8:
2252
+ return clean_name[:4] + clean_name[-4:]
2253
+ return clean_name
2254
+
2255
+ def get_param_variation_name(param_configs):
2256
+ """Get the parameter that was varied"""
2257
+ if not param_configs:
2258
+ return "const"
2259
+
2260
+ varied_params = []
2261
+ for param, config in param_configs.items():
2262
+ if config != "Constant":
2263
+ # Extract the number from "Whole range X values"
2264
+ if "values" in config:
2265
+ num_values = config.split()[2] if len(config.split()) > 2 else "X"
2266
+ varied_params.append(f"{param}_{num_values}")
2267
+
2268
+ if not varied_params:
2269
+ return "const"
2270
+ return "_".join(varied_params)
2271
+
2272
+ # Build filename components
2273
+ embedding_short = get_short_name(embedding_model) if embedding_model else "emb"
2274
+ llm_short = get_short_name(llm_model) if llm_model else "llm"
2275
+ param_short = get_param_variation_name(param_variations) if param_variations else "const"
2276
+
2277
+ # Create filename: batch_embedding_llm_params_timestamp.csv
2278
+ csv_filename = f"batch_{embedding_short}_{llm_short}_{param_short}_{timestamp}.csv"
2279
  csv_path = os.path.abspath(csv_filename)
2280
 
2281
  # Extract parameters and responses
 
2331
 
2332
  return csv_path
2333
 
2334
+ def format_batch_result_files(results: List[Dict], job_id: str,
2335
+ embedding_model: str = None, llm_model: str = None,
2336
+ param_variations: Dict = None) -> Tuple[str, str]:
2337
  """Format batch results with links to CSV files"""
2338
+ # Create CSV file with improved filename
2339
+ csv_path = create_csv_from_batch_results(results, job_id, embedding_model, llm_model, param_variations)
2340
 
2341
  # Format the results
2342
  formatted_results = "### Batch Query Results\n\n"
advanced_rag_updated.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
3
+ import datetime
4
+ import functools
5
+ import traceback
6
+ from typing import List, Optional, Any, Dict, Tuple
7
+ import csv
8
+ import pandas as pd
9
+ import tempfile
10
+ import shutil
11
+ import glob
12
+
13
+ import torch
14
+ import transformers
15
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
16
+ from langchain_community.llms import HuggingFacePipeline
17
+
18
+ # Other LangChain and community imports
19
+ from langchain_community.document_loaders import OnlinePDFLoader
20
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
21
+ from langchain_community.vectorstores import FAISS
22
+ from langchain.embeddings import HuggingFaceEmbeddings
23
+ from langchain_community.retrievers import BM25Retriever
24
+ from langchain.embeddings.base import Embeddings
25
+ from langchain.retrievers import EnsembleRetriever
26
+ from langchain.prompts import ChatPromptTemplate
27
+ from langchain.schema import StrOutputParser, Document
28
+ from langchain_core.runnables import RunnableParallel, RunnableLambda
29
+ from transformers.quantizers.auto import AutoQuantizationConfig
30
+ import gradio as gr
31
+ from pydantic import PrivateAttr
32
+ import pydantic
33
+
34
+ from langchain.llms.base import LLM
35
+ from typing import Any, Optional, List
36
+ import typing
37
+ import time
38
+ import re
39
+ import requests
40
+ from langchain.schema import Document
41
+ from langchain_community.document_loaders import PyMuPDFLoader # Updated loader
42
+ import tempfile
43
+ import mimetypes
44
+ import gc
45
+
46
+ # Add batch processing helper functions
47
+ def generate_parameter_values(min_val, max_val, num_values):
48
+ """Generate evenly spaced values between min and max"""
49
+ if num_values == 1:
50
+ return [min_val]
51
+ step = (max_val - min_val) / (num_values - 1)
52
+ return [min_val + (step * i) for i in range(num_values)]
53
+
54
+ def process_batch_query(query, model_choice, max_tokens, param_configs, slider_values, job_id, use_history=True):
55
+ """Process a batch of queries with different parameter combinations"""
56
+ results = []
57
+
58
+ # Generate all parameter combinations
59
+ temp_values = [slider_values['temperature']] if param_configs['temperature'] == "Constant" else generate_parameter_values(0.1, 1.0, int(param_configs['temperature'].split()[2]))
60
+ top_p_values = [slider_values['top_p']] if param_configs['top_p'] == "Constant" else generate_parameter_values(0.1, 0.99, int(param_configs['top_p'].split()[2]))
61
+ top_k_values = [slider_values['top_k']] if param_configs['top_k'] == "Constant" else generate_parameter_values(1, 100, int(param_configs['top_k'].split()[2]))
62
+ bm25_values = [slider_values['bm25']] if param_configs['bm25'] == "Constant" else generate_parameter_values(0.0, 1.0, int(param_configs['bm25'].split()[2]))
63
+
64
+ total_combinations = len(temp_values) * len(top_p_values) * len(top_k_values) * len(bm25_values)
65
+ current = 0
66
+
67
+ for temp in temp_values:
68
+ for top_p in top_p_values:
69
+ for top_k in top_k_values:
70
+ for bm25 in bm25_values:
71
+ current += 1
72
+ try:
73
+ # Update parameters
74
+ rag_chain.temperature = temp
75
+ rag_chain.top_p = top_p
76
+ rag_chain.top_k = top_k
77
+ rag_chain.bm25_weight = bm25
78
+ rag_chain.faiss_weight = 1.0 - bm25
79
+
80
+ # Update ensemble retriever
81
+ rag_chain.ensemble_retriever = EnsembleRetriever(
82
+ retrievers=[rag_chain.bm25_retriever, rag_chain.faiss_retriever],
83
+ weights=[rag_chain.bm25_weight, rag_chain.faiss_weight]
84
+ )
85
+
86
+ # Process query
87
+ response = rag_chain.elevated_rag_chain.invoke({"question": query})
88
+
89
+ # Store response in history if enabled
90
+ if use_history:
91
+ trimmed_response = response[:1000] + ("..." if len(response) > 1000 else "")
92
+ rag_chain.conversation_history.append({"query": query, "response": trimmed_response})
93
+
94
+ # Format result
95
+ result = {
96
+ "Parameters": f"Temp: {temp:.2f}, Top-p: {top_p:.2f}, Top-k: {top_k}, BM25: {bm25:.2f}",
97
+ "Response": response,
98
+ "Progress": f"Query {current}/{total_combinations}"
99
+ }
100
+ results.append(result)
101
+
102
+ except Exception as e:
103
+ results.append({
104
+ "Parameters": f"Temp: {temp:.2f}, Top-p: {top_p:.2f}, Top-k: {top_k}, BM25: {bm25:.2f}",
105
+ "Response": f"Error: {str(e)}",
106
+ "Progress": f"Query {current}/{total_combinations}"
107
+ })
108
+
109
+ # Format results with CSV file links - UPDATED TO PASS ADDITIONAL PARAMETERS
110
+ formatted_results, csv_path = format_batch_result_files(
111
+ results, job_id,
112
+ embedding_model=getattr(rag_chain, 'embedding_model', 'unknown'),
113
+ llm_model=model_choice,
114
+ param_variations=param_configs
115
+ )
116
+
117
+ return (
118
+ formatted_results,
119
+ csv_path,
120
+ f"Job ID: {job_id}",
121
+ f"Input tokens: {count_tokens(query)}",
122
+ f"Output tokens: {sum(count_tokens(r['Response']) for r in results)}"
123
+ )
124
+
125
+ # ... (rest of the file content would go here, but I'll focus on the specific functions that need updating)
126
+
127
+ def create_csv_from_batch_results(results: List[Dict], job_id: str,
128
+ embedding_model: str = None, llm_model: str = None,
129
+ param_variations: Dict = None) -> str:
130
+ """Create a CSV file from batch query results and return the file path"""
131
+ # Save CSV files in the current directory for HuggingFace Spaces compatibility
132
+
133
+ # Create a descriptive filename
134
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
135
+
136
+ # Extract short names for filename
137
+ def get_short_name(full_name, prefix_length=2):
138
+ """Extract short name from full model name"""
139
+ if not full_name:
140
+ return "unknown"
141
+ # Remove emojis and get the actual model name
142
+ clean_name = full_name.split(" ", 1)[-1] if " " in full_name else full_name
143
+ # Get first few characters and last few characters
144
+ if len(clean_name) > 8:
145
+ return clean_name[:4] + clean_name[-4:]
146
+ return clean_name
147
+
148
+ def get_param_variation_name(param_configs):
149
+ """Get the parameter that was varied"""
150
+ if not param_configs:
151
+ return "const"
152
+
153
+ varied_params = []
154
+ for param, config in param_configs.items():
155
+ if config != "Constant":
156
+ # Extract the number from "Whole range X values"
157
+ if "values" in config:
158
+ num_values = config.split()[2] if len(config.split()) > 2 else "X"
159
+ varied_params.append(f"{param}_{num_values}")
160
+
161
+ if not varied_params:
162
+ return "const"
163
+ return "_".join(varied_params)
164
+
165
+ # Build filename components
166
+ embedding_short = get_short_name(embedding_model) if embedding_model else "emb"
167
+ llm_short = get_short_name(llm_model) if llm_model else "llm"
168
+ param_short = get_param_variation_name(param_variations) if param_variations else "const"
169
+
170
+ # Create filename: batch_embedding_llm_params_timestamp.csv
171
+ csv_filename = f"batch_{embedding_short}_{llm_short}_{param_short}_{timestamp}.csv"
172
+ csv_path = os.path.abspath(csv_filename)
173
+
174
+ # Extract parameters and responses
175
+ data = []
176
+ start_time = time.time()
177
+ for result in results:
178
+ params = result["Parameters"]
179
+ response = result["Response"]
180
+ progress = result["Progress"]
181
+
182
+ # Calculate elapsed time for this query
183
+ current_time = time.time()
184
+ elapsed_time = current_time - start_time
185
+
186
+ # Extract individual parameter values
187
+ temp = float(re.search(r"Temp: ([\d.]+)", params).group(1))
188
+ top_p = float(re.search(r"Top-p: ([\d.]+)", params).group(1))
189
+ top_k = int(re.search(r"Top-k: (\d+)", params).group(1))
190
+ bm25 = float(re.search(r"BM25: ([\d.]+)", params).group(1))
191
+
192
+ # Extract response components
193
+ model_info = re.search(r"Model: (.*?)\n", response)
194
+ model = model_info.group(1) if model_info else "Unknown"
195
+
196
+ # Extract main answer (everything between the parameters and the token counts)
197
+ answer_match = re.search(r"Model Parameters:.*?\n\n(.*?)\n\n---", response, re.DOTALL)
198
+ main_answer = answer_match.group(1).strip() if answer_match else response
199
+
200
+ # Extract token counts
201
+ input_tokens = re.search(r"Input tokens: (\d+)", response)
202
+ output_tokens = re.search(r"Output tokens: (\d+)", response)
203
+
204
+ # Extract conversation history count
205
+ conv_history = re.search(r"Conversation History: (\d+) conversation", response)
206
+
207
+ data.append({
208
+ "Temperature": temp,
209
+ "Top-p": top_p,
210
+ "Top-k": top_k,
211
+ "BM25 Weight": bm25,
212
+ "Model": model,
213
+ "Main Answer": main_answer,
214
+ "Input Tokens": input_tokens.group(1) if input_tokens else "N/A",
215
+ "Output Tokens": output_tokens.group(1) if output_tokens else "N/A",
216
+ "Conversation History": conv_history.group(1) if conv_history else "0",
217
+ "Progress": progress,
218
+ "Elapsed Time (s)": f"{elapsed_time:.2f}"
219
+ })
220
+
221
+ # Create DataFrame and save to CSV
222
+ df = pd.DataFrame(data)
223
+ df.to_csv(csv_path, index=False)
224
+
225
+ return csv_path
226
+
227
+ def format_batch_result_files(results: List[Dict], job_id: str,
228
+ embedding_model: str = None, llm_model: str = None,
229
+ param_variations: Dict = None) -> Tuple[str, str]:
230
+ """Format batch results with links to CSV files"""
231
+ # Create CSV file with improved filename
232
+ csv_path = create_csv_from_batch_results(results, job_id, embedding_model, llm_model, param_variations)
233
+
234
+ # Format the results
235
+ formatted_results = "### Batch Query Results\n\n"
236
+
237
+ # Add the actual results
238
+ for result in results:
239
+ formatted_results += f"#### {result['Parameters']}\n"
240
+ formatted_results += f"**Progress:** {result['Progress']}\n\n"
241
+ formatted_results += f"{result['Response']}\n\n"
242
+ formatted_results += "---\n\n"
243
+
244
+ return formatted_results, csv_path
245
+
temp_import.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ import gc
2
+
test_filename_generation.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to verify the new CSV filename generation functionality
4
+ """
5
+
6
+ import sys
7
+ import os
8
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
9
+
10
+ from advanced_rag import get_short_embedding_name, get_short_llm_name, get_varied_parameter
11
+
12
+ def test_embedding_names():
13
+ """Test embedding model name generation"""
14
+ test_cases = [
15
+ ("πŸ€— sentence-transformers/all-MiniLM-L6-v2 (384 dim, fast)", "MiniLM"),
16
+ ("πŸ€— BAAI/bge-base-en-v1.5 (768 dim, excellent)", "BGE-Base"),
17
+ ("🟦 Qwen/Qwen3-Embedding-8B (1024 dim, advanced)", "Qwen3-8B"),
18
+ ("sentence-transformers/all-mpnet-base-v2", "MPNet"),
19
+ ("unknown-model", "unknown")
20
+ ]
21
+
22
+ print("Testing embedding name generation:")
23
+ for input_name, expected in test_cases:
24
+ result = get_short_embedding_name(input_name)
25
+ status = "βœ“" if result == expected else "βœ—"
26
+ print(f" {status} {input_name} -> {result} (expected: {expected})")
27
+
28
+ def test_llm_names():
29
+ """Test LLM model name generation"""
30
+ test_cases = [
31
+ ("πŸ‡ͺπŸ‡Ί Mistral-API", "Mistral"),
32
+ ("πŸ‡ΊπŸ‡Έ Remote Meta-Llama-3", "Llama3"),
33
+ ("πŸ‡ΊπŸ‡Έ GPT-4o", "GPT4o"),
34
+ ("mistral-small-latest", "Mistral"),
35
+ ("meta-llama/Meta-Llama-3-8B-Instruct", "Llama3"),
36
+ ("unknown-model", "unknown")
37
+ ]
38
+
39
+ print("\nTesting LLM name generation:")
40
+ for input_name, expected in test_cases:
41
+ result = get_short_llm_name(input_name)
42
+ status = "βœ“" if result == expected else "βœ—"
43
+ print(f" {status} {input_name} -> {result} (expected: {expected})")
44
+
45
+ def test_varied_parameter():
46
+ """Test varied parameter detection"""
47
+ test_cases = [
48
+ ({"temperature": "Constant", "top_p": "Constant", "top_k": "Constant", "bm25": "Constant"}, "None"),
49
+ ({"temperature": "Whole range 3 values", "top_p": "Constant", "top_k": "Constant", "bm25": "Constant"}, "temperature"),
50
+ ({"temperature": "Constant", "top_p": "Whole range 5 values", "top_k": "Constant", "bm25": "Constant"}, "top_p"),
51
+ ({"temperature": "Whole range 3 values", "top_p": "Whole range 5 values", "top_k": "Constant", "bm25": "Constant"}, "Multi"),
52
+ ({"temperature": "Constant", "top_p": "Constant", "top_k": "Constant", "bm25": "Whole range 7 values"}, "bm25")
53
+ ]
54
+
55
+ print("\nTesting varied parameter detection:")
56
+ for param_configs, expected in test_cases:
57
+ result = get_varied_parameter(param_configs)
58
+ status = "βœ“" if result == expected else "βœ—"
59
+ print(f" {status} {param_configs} -> {result} (expected: {expected})")
60
+
61
+ def test_filename_generation():
62
+ """Test complete filename generation"""
63
+ from datetime import datetime
64
+
65
+ # Mock timestamp for consistent testing
66
+ timestamp = "20241201_120000"
67
+
68
+ test_cases = [
69
+ ("πŸ€— sentence-transformers/all-MiniLM-L6-v2 (384 dim, fast)", "πŸ‡ͺπŸ‡Ί Mistral-API", "temperature", "batch_MiniLM_Mistral_temperature_20241201_120000.csv"),
70
+ ("πŸ€— BAAI/bge-base-en-v1.5 (768 dim, excellent)", "πŸ‡ΊπŸ‡Έ Remote Meta-Llama-3", "top_p", "batch_BGE-Base_Llama3_top_p_20241201_120000.csv"),
71
+ ("🟦 Qwen/Qwen3-Embedding-8B (1024 dim, advanced)", "πŸ‡ΊπŸ‡Έ GPT-4o", "Multi", "batch_Qwen3-8B_GPT4o_Multi_20241201_120000.csv"),
72
+ ("", "", "None", "batch_Unknown_Unknown_None_20241201_120000.csv")
73
+ ]
74
+
75
+ print("\nTesting complete filename generation:")
76
+ for embedding, llm, param, expected in test_cases:
77
+ short_embedding = get_short_embedding_name(embedding) if embedding else "Unknown"
78
+ short_llm = get_short_llm_name(llm) if llm else "Unknown"
79
+ short_param = param if param else "None"
80
+
81
+ filename = f"batch_{short_embedding}_{short_llm}_{short_param}_{timestamp}.csv"
82
+ status = "βœ“" if filename == expected else "βœ—"
83
+ print(f" {status} Generated: {filename}")
84
+ print(f" Expected: {expected}")
85
+
86
+ if __name__ == "__main__":
87
+ print("Testing CSV filename generation functionality\n")
88
+ test_embedding_names()
89
+ test_llm_names()
90
+ test_varied_parameter()
91
+ test_filename_generation()
92
+ print("\nTest completed!")