More_Advanced_Embeddings_Comparator

Running

App Files Files Community

Chris4K commited on Oct 26

Commit

54e2c0d

•

1 Parent(s): d3b0430

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -9

app.py CHANGED Viewed

@@ -354,7 +354,7 @@ def optimize_query(
     Returns:
         Expanded query string
-    """
     try:
         # Set device
         device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
@@ -383,7 +383,7 @@ def optimize_query(
                 Enhance the followinf search query with relevant terms.
                 show me just the new term. You SHOULD NOT include any other text in the response.
                 <|eot_id|><|start_header_id|>user<|end_header_id|>
                 {query}
                 <|eot_id|><|start_header_id|>assistant<|end_header_id|>
@@ -1084,10 +1084,10 @@ def get_llm_suggested_settings(file, num_chunks=1):
     prompt=f'''
     <|start_header_id|>system<|end_header_id|>
-    You are an expert in information retrieval.
     You know about strenghs and weaknesses of all models.
-    Given the following text chunks from a document,
     suggest optimal settings for an embedding-based search system. The settings should include:
     1. Embedding model type and name
@@ -1113,13 +1113,13 @@ def get_llm_suggested_settings(file, num_chunks=1):
         "apply_preprocessing": True,
         "optimize_vocab": True,
         "apply_phonetic": False,
-        "phonetic_weight": 0.3  #
     }}
     Provide your suggestions in a Python dictionary format.
     show me settings You SHOULD NOT include any other text in the response.
-    Fill out the seeting and chose usefull values.
     Respect the users use cases and content snipet. Choose the setting based on the chunks
     <|eot_id|><|start_header_id|>user<|end_header_id|>
@@ -1142,13 +1142,13 @@ def get_llm_suggested_settings(file, num_chunks=1):
             max_new_tokens=1900,    # Control the length of the output,
             truncation=True,  # Enable truncation
         )
     print(suggested_settings[0]['generated_text'])
     # Safely parse the generated text to extract the dictionary
     try:
         # Using ast.literal_eval for safe parsing
         settings_dict = ast.literal_eval(suggested_settings[0]['generated_text'])
         # Convert the settings to match the interface inputs
         return {
             "embedding_models": settings_dict["embedding_models"],
@@ -1388,7 +1388,11 @@ def launch_interface(debug=True):
             )
         ###
-        with gr.Tab("Results"):
             with gr.Row():
                 results_output = gr.DataFrame(label="Results")
                 stats_output = gr.DataFrame(label="Statistics")

     Returns:
         Expanded query string
+    """
     try:
         # Set device
         device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
                 Enhance the followinf search query with relevant terms.
                 show me just the new term. You SHOULD NOT include any other text in the response.
                 <|eot_id|><|start_header_id|>user<|end_header_id|>
                 {query}
                 <|eot_id|><|start_header_id|>assistant<|end_header_id|>
     prompt=f'''
     <|start_header_id|>system<|end_header_id|>
+    You are an expert in information retrieval.
     You know about strenghs and weaknesses of all models.
+    Given the following text chunks from a document,
     suggest optimal settings for an embedding-based search system. The settings should include:
     1. Embedding model type and name
         "apply_preprocessing": True,
         "optimize_vocab": True,
         "apply_phonetic": False,
+        "phonetic_weight": 0.3  #
     }}
     Provide your suggestions in a Python dictionary format.
     show me settings You SHOULD NOT include any other text in the response.
+    Fill out the seeting and chose usefull values.
     Respect the users use cases and content snipet. Choose the setting based on the chunks
     <|eot_id|><|start_header_id|>user<|end_header_id|>
             max_new_tokens=1900,    # Control the length of the output,
             truncation=True,  # Enable truncation
         )
     print(suggested_settings[0]['generated_text'])
     # Safely parse the generated text to extract the dictionary
     try:
         # Using ast.literal_eval for safe parsing
         settings_dict = ast.literal_eval(suggested_settings[0]['generated_text'])
         # Convert the settings to match the interface inputs
         return {
             "embedding_models": settings_dict["embedding_models"],
             )
         ###
+        with gr.Tab("Chat"):
+            with gr.Row():
+                chat_output =
+                chat_input =
             with gr.Row():
                 results_output = gr.DataFrame(label="Results")
                 stats_output = gr.DataFrame(label="Statistics")