seanpedrickcase commited on
Commit
bd1a015
·
1 Parent(s): bd19985

Generally improved inference for low vram systems, unsloth usage improvements, updated packages, switched default local model to Qwen 3 4b

Browse files
README.md CHANGED
@@ -97,7 +97,7 @@ The repo provides several requirements files that are relevant for different sit
97
 
98
  - **requirements_no_local**: Can be used to install the app without local model inference for a more lightweight installation.
99
  - **requirements_gpu.txt**: Used for Python 3.11 GPU-enabled environments. Uncomment the requirements under 'Windows' for Windows compatibility (CUDA 12.4).
100
- - **requirements_cpu.txt**: Used for Python 3.11 CPU-only environments. Uncomment the requirements under 'Windows' for Windows compatibility.
101
  - **requirements.txt**: Used for the Python 3.10 GPU-enabled environment on Hugging Face spaces (CUDA 12.4).
102
 
103
  2. **Install packages from the requirements file:**
 
97
 
98
  - **requirements_no_local**: Can be used to install the app without local model inference for a more lightweight installation.
99
  - **requirements_gpu.txt**: Used for Python 3.11 GPU-enabled environments. Uncomment the requirements under 'Windows' for Windows compatibility (CUDA 12.4).
100
+ - **requirements_cpu.txt**: Used for Python 3.11 CPU-only environments. Uncomment the requirements under 'Windows' for Windows compatibility. Make sure you have [Openblas](https://github.com/OpenMathLib/OpenBLAS) installed!
101
  - **requirements.txt**: Used for the Python 3.10 GPU-enabled environment on Hugging Face spaces (CUDA 12.4).
102
 
103
  2. **Install packages from the requirements file:**
app.py CHANGED
@@ -51,12 +51,9 @@ if DYNAMODB_USAGE_LOG_HEADERS: DYNAMODB_USAGE_LOG_HEADERS = _get_env_list(DYNAMO
51
 
52
  today_rev = datetime.now().strftime("%Y%m%d")
53
 
54
- if RUN_LOCAL_MODEL == "1":
55
- default_model_choice = CHOSEN_LOCAL_MODEL_TYPE
56
- elif RUN_AWS_FUNCTIONS == "1":
57
- default_model_choice = "anthropic.claude-3-haiku-20240307-v1:0"
58
- else:
59
- default_model_choice = "gemini-2.5-flash"
60
 
61
  # Create the gradio interface
62
  app = gr.Blocks(theme = gr.themes.Default(primary_hue="blue"), fill_width=True)
@@ -119,6 +116,7 @@ with app:
119
  summarised_references_markdown = gr.Markdown("", visible=False)
120
  summarised_outputs_list = gr.Dropdown(value= list(), choices= list(), visible=False, label="List of summarised outputs", allow_custom_value=True)
121
  latest_summary_completed_num = gr.Number(0, visible=False)
 
122
 
123
  summary_xlsx_output_files_list = gr.Dropdown(value= list(), choices= list(), visible=False, label="List of xlsx summary output files", allow_custom_value=True)
124
 
@@ -192,7 +190,7 @@ with app:
192
  extract_topics_btn = gr.Button("1. Extract topics", variant="secondary")
193
 
194
  with gr.Row(equal_height=True):
195
- output_messages_textbox = gr.Textbox(value="", label="Output messages", scale=1, interactive=False)
196
  topic_extraction_output_files_xlsx = gr.File(label="Overall summary xlsx file", scale=1, interactive=False)
197
  topic_extraction_output_files = gr.File(label="Extract topics output files", scale=1, interactive=False)
198
 
@@ -410,7 +408,8 @@ with app:
410
  hf_api_key_textbox,
411
  azure_api_key_textbox,
412
  output_folder_state,
413
- logged_content_df],
 
414
  outputs=[display_topic_table_markdown,
415
  master_topic_df_state,
416
  master_unique_topics_df_state,
@@ -432,7 +431,8 @@ with app:
432
  output_tokens_num,
433
  number_of_calls_num,
434
  output_messages_textbox,
435
- logged_content_df],
 
436
  api_name="extract_topics", show_progress_on=output_messages_textbox).\
437
  success(lambda *args: usage_callback.flag(list(args), save_to_csv=SAVE_LOGS_TO_CSV, save_to_dynamodb=SAVE_LOGS_TO_DYNAMODB, dynamodb_table_name=USAGE_LOG_DYNAMODB_TABLE_NAME, dynamodb_headers=DYNAMODB_USAGE_LOG_HEADERS, replacement_headers=CSV_USAGE_LOG_HEADERS), [session_hash_textbox, original_data_file_name_textbox, in_colnames, model_choice, conversation_metadata_textbox_placeholder, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number, cost_code_choice_drop], None, preprocess=False, api_name="usage_logs").\
438
  then(collect_output_csvs_and_create_excel_output, inputs=[in_data_files, in_colnames, original_data_file_name_textbox, in_group_col, model_choice, master_reference_df_state, master_unique_topics_df_state, summarised_output_df, missing_df_state, in_excel_sheets, usage_logs_state, model_name_map_state, output_folder_state], outputs=[topic_extraction_output_files_xlsx, summary_xlsx_output_files_list])
@@ -518,7 +518,8 @@ with app:
518
  log_files_output_list_state,
519
  model_name_map_state,
520
  usage_logs_state,
521
- logged_content_df
 
522
  ],
523
  outputs=[
524
  display_topic_table_markdown,
@@ -603,7 +604,7 @@ with app:
603
  success(fn=join_cols_onto_reference_df, inputs=[master_reference_df_state, file_data_state, join_colnames, reference_df_data_file_name_textbox], outputs=[master_reference_df_state_joined, out_join_files])
604
 
605
  # Export to xlsx file
606
- export_xlsx_btn.click(collect_output_csvs_and_create_excel_output, inputs=[in_data_files, in_colnames, original_data_file_name_textbox, in_group_col, model_choice, master_reference_df_state, master_unique_topics_df_state, summarised_output_df, missing_df_state, in_excel_sheets, usage_logs_state, model_name_map_state, output_folder_state], outputs=[out_xlsx_files], api_name="export_xlsx")
607
 
608
  # If relevant environment variable is set, load in the default cost code file from S3 or locally
609
  if GET_COST_CODES == "True" and (COST_CODES_PATH or S3_COST_CODES_PATH):
 
51
 
52
  today_rev = datetime.now().strftime("%Y%m%d")
53
 
54
+ if RUN_LOCAL_MODEL == "1": default_model_choice = CHOSEN_LOCAL_MODEL_TYPE
55
+ elif RUN_AWS_FUNCTIONS == "1": default_model_choice = "anthropic.claude-3-haiku-20240307-v1:0"
56
+ else: default_model_choice = "gemini-2.5-flash"
 
 
 
57
 
58
  # Create the gradio interface
59
  app = gr.Blocks(theme = gr.themes.Default(primary_hue="blue"), fill_width=True)
 
116
  summarised_references_markdown = gr.Markdown("", visible=False)
117
  summarised_outputs_list = gr.Dropdown(value= list(), choices= list(), visible=False, label="List of summarised outputs", allow_custom_value=True)
118
  latest_summary_completed_num = gr.Number(0, visible=False)
119
+ add_existing_topics_summary_format_textbox = gr.Textbox(value="", visible=False, label="Add existing topics summary format")
120
 
121
  summary_xlsx_output_files_list = gr.Dropdown(value= list(), choices= list(), visible=False, label="List of xlsx summary output files", allow_custom_value=True)
122
 
 
190
  extract_topics_btn = gr.Button("1. Extract topics", variant="secondary")
191
 
192
  with gr.Row(equal_height=True):
193
+ output_messages_textbox = gr.Textbox(value="", label="Output messages", scale=1, interactive=False, lines=4)
194
  topic_extraction_output_files_xlsx = gr.File(label="Overall summary xlsx file", scale=1, interactive=False)
195
  topic_extraction_output_files = gr.File(label="Extract topics output files", scale=1, interactive=False)
196
 
 
408
  hf_api_key_textbox,
409
  azure_api_key_textbox,
410
  output_folder_state,
411
+ logged_content_df,
412
+ add_existing_topics_summary_format_textbox],
413
  outputs=[display_topic_table_markdown,
414
  master_topic_df_state,
415
  master_unique_topics_df_state,
 
431
  output_tokens_num,
432
  number_of_calls_num,
433
  output_messages_textbox,
434
+ logged_content_df,
435
+ add_existing_topics_summary_format_textbox],
436
  api_name="extract_topics", show_progress_on=output_messages_textbox).\
437
  success(lambda *args: usage_callback.flag(list(args), save_to_csv=SAVE_LOGS_TO_CSV, save_to_dynamodb=SAVE_LOGS_TO_DYNAMODB, dynamodb_table_name=USAGE_LOG_DYNAMODB_TABLE_NAME, dynamodb_headers=DYNAMODB_USAGE_LOG_HEADERS, replacement_headers=CSV_USAGE_LOG_HEADERS), [session_hash_textbox, original_data_file_name_textbox, in_colnames, model_choice, conversation_metadata_textbox_placeholder, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number, cost_code_choice_drop], None, preprocess=False, api_name="usage_logs").\
438
  then(collect_output_csvs_and_create_excel_output, inputs=[in_data_files, in_colnames, original_data_file_name_textbox, in_group_col, model_choice, master_reference_df_state, master_unique_topics_df_state, summarised_output_df, missing_df_state, in_excel_sheets, usage_logs_state, model_name_map_state, output_folder_state], outputs=[topic_extraction_output_files_xlsx, summary_xlsx_output_files_list])
 
518
  log_files_output_list_state,
519
  model_name_map_state,
520
  usage_logs_state,
521
+ logged_content_df,
522
+ add_existing_topics_summary_format_textbox
523
  ],
524
  outputs=[
525
  display_topic_table_markdown,
 
604
  success(fn=join_cols_onto_reference_df, inputs=[master_reference_df_state, file_data_state, join_colnames, reference_df_data_file_name_textbox], outputs=[master_reference_df_state_joined, out_join_files])
605
 
606
  # Export to xlsx file
607
+ export_xlsx_btn.click(collect_output_csvs_and_create_excel_output, inputs=[in_data_files, in_colnames, original_data_file_name_textbox, in_group_col, model_choice, master_reference_df_state, master_unique_topics_df_state, summarised_output_df, missing_df_state, in_excel_sheets, usage_logs_state, model_name_map_state, output_folder_state], outputs=[out_xlsx_files, summary_xlsx_output_files_list], api_name="export_xlsx")
608
 
609
  # If relevant environment variable is set, load in the default cost code file from S3 or locally
610
  if GET_COST_CODES == "True" and (COST_CODES_PATH or S3_COST_CODES_PATH):
tools/aws_functions.py CHANGED
@@ -15,9 +15,6 @@ def connect_to_bedrock_runtime(model_name_map:dict, model_choice:str, aws_access
15
  if RUN_AWS_FUNCTIONS == "1" and PRIORITISE_SSO_OVER_AWS_ENV_ACCESS_KEYS == "1":
16
  print("Connecting to Bedrock via existing SSO connection")
17
  bedrock_runtime = boto3.client('bedrock-runtime', region_name=AWS_REGION)
18
- elif RUN_AWS_FUNCTIONS == "1" and PRIORITISE_SSO_OVER_AWS_ENV_ACCESS_KEYS == "1":
19
- print("Connecting to Bedrock via existing SSO connection")
20
- bedrock_runtime = boto3.client('bedrock-runtime', region_name=AWS_REGION)
21
  elif aws_access_key_textbox and aws_secret_key_textbox:
22
  print("Connecting to Bedrock using AWS access key and secret keys from user input.")
23
  bedrock_runtime = boto3.client('bedrock-runtime',
 
15
  if RUN_AWS_FUNCTIONS == "1" and PRIORITISE_SSO_OVER_AWS_ENV_ACCESS_KEYS == "1":
16
  print("Connecting to Bedrock via existing SSO connection")
17
  bedrock_runtime = boto3.client('bedrock-runtime', region_name=AWS_REGION)
 
 
 
18
  elif aws_access_key_textbox and aws_secret_key_textbox:
19
  print("Connecting to Bedrock using AWS access key and secret keys from user input.")
20
  bedrock_runtime = boto3.client('bedrock-runtime',
tools/combine_sheets_into_xlsx.py CHANGED
@@ -380,8 +380,7 @@ def collect_output_csvs_and_create_excel_output(in_data_files:List, chosen_cols:
380
  xlsx_output_filenames = [xlsx_output_filename]
381
 
382
  # Delete intermediate csv files
383
- for csv_file in new_csv_files:
384
- os.remove(csv_file)
385
 
386
  return xlsx_output_filenames, xlsx_output_filenames
387
 
 
380
  xlsx_output_filenames = [xlsx_output_filename]
381
 
382
  # Delete intermediate csv files
383
+ for csv_file in new_csv_files: os.remove(csv_file)
 
384
 
385
  return xlsx_output_filenames, xlsx_output_filenames
386
 
tools/config.py CHANGED
@@ -190,7 +190,7 @@ if LOGGING == 'True':
190
  ###
191
  # App run variables
192
  ###
193
- OUTPUT_DEBUG_FILES = get_or_create_env_var('OUTPUT_DEBUG_FILES', 'False') # Whether to output debug files
194
 
195
  TIMEOUT_WAIT = int(get_or_create_env_var('TIMEOUT_WAIT', '30')) # Maximum number of seconds to wait for a response from the LLM
196
  NUMBER_OF_RETRY_ATTEMPTS = int(get_or_create_env_var('NUMBER_OF_RETRY_ATTEMPTS', '5')) # Maximum number of times to retry a request to the LLM
@@ -229,7 +229,7 @@ model_full_names = list()
229
  model_short_names = list()
230
  model_source = list()
231
 
232
- CHOSEN_LOCAL_MODEL_TYPE = get_or_create_env_var("CHOSEN_LOCAL_MODEL_TYPE", "Gemma 3 4B") # Gemma 3 1B # "Gemma 2b" # "Gemma 3 4B"
233
 
234
  if RUN_LOCAL_MODEL == "1" and CHOSEN_LOCAL_MODEL_TYPE:
235
  model_full_names.append(CHOSEN_LOCAL_MODEL_TYPE)
@@ -264,8 +264,21 @@ model_name_map = {
264
  HF_TOKEN = get_or_create_env_var('HF_TOKEN', '')
265
 
266
  LOAD_LOCAL_MODEL_AT_START = get_or_create_env_var('LOAD_LOCAL_MODEL_AT_START', 'True')
267
- USE_LLAMA_CPP = get_or_create_env_var('USE_LLAMA_CPP', 'True') # Llama.cpp or transformers
268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
  GEMMA2_REPO_ID = get_or_create_env_var("GEMMA2_2B_REPO_ID", "unsloth/gemma-2-it-GGUF")
271
  GEMMA2_REPO_TRANSFORMERS_ID = get_or_create_env_var("GEMMA2_2B_REPO_TRANSFORMERS_ID", "unsloth/gemma-2-2b-it-bnb-4bit")
@@ -293,18 +306,31 @@ GEMMA3_4B_MODEL_FOLDER = get_or_create_env_var("GEMMA3_4B_MODEL_FOLDER", "model/
293
 
294
  GPT_OSS_REPO_ID = get_or_create_env_var("GPT_OSS_REPO_ID", "unsloth/gpt-oss-20b-GGUF")
295
  GPT_OSS_REPO_TRANSFORMERS_ID = get_or_create_env_var("GPT_OSS_REPO_TRANSFORMERS_ID", "unsloth/gpt-oss-20b-unsloth-bnb-4bit")
296
- if USE_LLAMA_CPP == "False":
297
- GPT_OSS_REPO_ID = GPT_OSS_REPO_TRANSFORMERS_ID
298
 
299
  GPT_OSS_MODEL_FILE = get_or_create_env_var("GPT_OSS_MODEL_FILE", "gpt-oss-20b-F16.gguf")
300
  GPT_OSS_MODEL_FOLDER = get_or_create_env_var("GPT_OSS_MODEL_FOLDER", "model/gpt_oss")
301
 
302
  USE_SPECULATIVE_DECODING = get_or_create_env_var("USE_SPECULATIVE_DECODING", "False")
303
- ASSISTANT_MODEL = get_or_create_env_var("ASSISTANT_MODEL", "unsloth/gemma-3-270m-it")
304
 
305
- GEMMA3_DRAFT_MODEL_LOC = get_or_create_env_var("GEMMA3_DRAFT_MODEL_LOC", ".cache/llama.cpp/unsloth_gemma-3-270m-it-qat-GGUF_gemma-3-270m-it-qat-F16.gguf")
 
 
 
 
 
 
 
306
 
307
- GEMMA3_4B_DRAFT_MODEL_LOC = get_or_create_env_var("GEMMA3_4B_DRAFT_MODEL_LOC", ".cache/llama.cpp/unsloth_gemma-3-4b-it-qat-GGUF_gemma-3-4b-it-qat-Q4_K_M.gguf")
 
 
 
 
 
 
 
 
308
 
309
  if CHOSEN_LOCAL_MODEL_TYPE == "Gemma 2b":
310
  LOCAL_REPO_ID = GEMMA2_REPO_ID
@@ -322,34 +348,45 @@ elif CHOSEN_LOCAL_MODEL_TYPE == "Gemma 3 4B":
322
  LOCAL_MODEL_FILE = GEMMA3_4B_MODEL_FILE
323
  LOCAL_MODEL_FOLDER = GEMMA3_4B_MODEL_FOLDER
324
 
 
 
 
 
 
325
  elif CHOSEN_LOCAL_MODEL_TYPE == "gpt-oss-20b":
326
  LOCAL_REPO_ID = GPT_OSS_REPO_ID
327
  LOCAL_MODEL_FILE = GPT_OSS_MODEL_FILE
328
  LOCAL_MODEL_FOLDER = GPT_OSS_MODEL_FOLDER
329
 
330
  LLM_MAX_GPU_LAYERS = int(get_or_create_env_var('LLM_MAX_GPU_LAYERS','-1')) # Maximum possible
331
- LLM_TEMPERATURE = float(get_or_create_env_var('LLM_TEMPERATURE', '0.1'))
332
  LLM_TOP_K = int(get_or_create_env_var('LLM_TOP_K','64')) # https://docs.unsloth.ai/basics/gemma-3-how-to-run-and-fine-tune
333
  LLM_MIN_P = float(get_or_create_env_var('LLM_MIN_P', '0'))
334
  LLM_TOP_P = float(get_or_create_env_var('LLM_TOP_P', '0.95'))
335
  LLM_REPETITION_PENALTY = float(get_or_create_env_var('LLM_REPETITION_PENALTY', '1.0'))
336
 
337
  LLM_LAST_N_TOKENS = int(get_or_create_env_var('LLM_LAST_N_TOKENS', '512'))
338
- LLM_MAX_NEW_TOKENS = int(get_or_create_env_var('LLM_MAX_NEW_TOKENS', '4096'))
339
  LLM_SEED = int(get_or_create_env_var('LLM_SEED', '42'))
340
  LLM_RESET = get_or_create_env_var('LLM_RESET', 'True')
341
  LLM_STREAM = get_or_create_env_var('LLM_STREAM', 'True')
342
  LLM_THREADS = int(get_or_create_env_var('LLM_THREADS', '-1'))
343
- LLM_BATCH_SIZE = int(get_or_create_env_var('LLM_BATCH_SIZE', '128'))
344
- LLM_CONTEXT_LENGTH = int(get_or_create_env_var('LLM_CONTEXT_LENGTH', '16384'))
345
  LLM_SAMPLE = get_or_create_env_var('LLM_SAMPLE', 'True')
346
- LLM_STOP_STRINGS = get_or_create_env_var('LLM_STOP_STRINGS', r"['\n\n\n\n']")
 
347
  SPECULATIVE_DECODING = get_or_create_env_var('SPECULATIVE_DECODING', 'False')
348
  NUM_PRED_TOKENS = int(get_or_create_env_var('NUM_PRED_TOKENS', '2'))
349
- if CHOSEN_LOCAL_MODEL_TYPE == "gpt-oss-20b":
350
- REASONING_SUFFIX = get_or_create_env_var('REASONING_SUFFIX', 'Reasoning: low')
351
- else:
352
- REASONING_SUFFIX = get_or_create_env_var('REASONING_SUFFIX', '') # If you are using e.g. gpt-oss, you can add a reasoning suffix to set reasoning level
 
 
 
 
 
353
 
354
  # Transformers variables
355
  COMPILE_TRANSFORMERS = get_or_create_env_var('COMPILE_TRANSFORMERS', 'False') # Whether to compile transformers models
 
190
  ###
191
  # App run variables
192
  ###
193
+ OUTPUT_DEBUG_FILES = get_or_create_env_var('OUTPUT_DEBUG_FILES', 'True') # Whether to output debug files
194
 
195
  TIMEOUT_WAIT = int(get_or_create_env_var('TIMEOUT_WAIT', '30')) # Maximum number of seconds to wait for a response from the LLM
196
  NUMBER_OF_RETRY_ATTEMPTS = int(get_or_create_env_var('NUMBER_OF_RETRY_ATTEMPTS', '5')) # Maximum number of times to retry a request to the LLM
 
229
  model_short_names = list()
230
  model_source = list()
231
 
232
+ CHOSEN_LOCAL_MODEL_TYPE = get_or_create_env_var("CHOSEN_LOCAL_MODEL_TYPE", "Qwen 3 4B") # Gemma 3 1B # "Gemma 2b" # "Gemma 3 4B"
233
 
234
  if RUN_LOCAL_MODEL == "1" and CHOSEN_LOCAL_MODEL_TYPE:
235
  model_full_names.append(CHOSEN_LOCAL_MODEL_TYPE)
 
264
  HF_TOKEN = get_or_create_env_var('HF_TOKEN', '')
265
 
266
  LOAD_LOCAL_MODEL_AT_START = get_or_create_env_var('LOAD_LOCAL_MODEL_AT_START', 'True')
 
267
 
268
+ # If you are using a system with low VRAM, you can set this to True to reduce the memory requirements
269
+ LOW_VRAM_SYSTEM = get_or_create_env_var('LOW_VRAM_SYSTEM', 'False')
270
+
271
+ if LOW_VRAM_SYSTEM == 'True':
272
+ print("Changing settings for low VRAM system")
273
+ USE_LLAMA_CPP = get_or_create_env_var('USE_LLAMA_CPP', 'True')
274
+ LLM_MAX_NEW_TOKENS = int(get_or_create_env_var('LLM_MAX_NEW_TOKENS', '4096'))
275
+ LLM_CONTEXT_LENGTH = int(get_or_create_env_var('LLM_CONTEXT_LENGTH', '8192'))
276
+ LLM_BATCH_SIZE = int(get_or_create_env_var('LLM_BATCH_SIZE', '512'))
277
+ KV_QUANT_LEVEL = int(get_or_create_env_var('KV_QUANT_LEVEL', '2')) # 2 is equivalent to q4_0, 8 is q8_0
278
+
279
+
280
+
281
+ USE_LLAMA_CPP = get_or_create_env_var('USE_LLAMA_CPP', 'True') # Llama.cpp or transformers with unsloth
282
 
283
  GEMMA2_REPO_ID = get_or_create_env_var("GEMMA2_2B_REPO_ID", "unsloth/gemma-2-it-GGUF")
284
  GEMMA2_REPO_TRANSFORMERS_ID = get_or_create_env_var("GEMMA2_2B_REPO_TRANSFORMERS_ID", "unsloth/gemma-2-2b-it-bnb-4bit")
 
306
 
307
  GPT_OSS_REPO_ID = get_or_create_env_var("GPT_OSS_REPO_ID", "unsloth/gpt-oss-20b-GGUF")
308
  GPT_OSS_REPO_TRANSFORMERS_ID = get_or_create_env_var("GPT_OSS_REPO_TRANSFORMERS_ID", "unsloth/gpt-oss-20b-unsloth-bnb-4bit")
309
+ if USE_LLAMA_CPP == "False": GPT_OSS_REPO_ID = GPT_OSS_REPO_TRANSFORMERS_ID
 
310
 
311
  GPT_OSS_MODEL_FILE = get_or_create_env_var("GPT_OSS_MODEL_FILE", "gpt-oss-20b-F16.gguf")
312
  GPT_OSS_MODEL_FOLDER = get_or_create_env_var("GPT_OSS_MODEL_FOLDER", "model/gpt_oss")
313
 
314
  USE_SPECULATIVE_DECODING = get_or_create_env_var("USE_SPECULATIVE_DECODING", "False")
 
315
 
316
+ if CHOSEN_LOCAL_MODEL_TYPE == "Gemma 3 4B": ASSISTANT_MODEL = get_or_create_env_var("ASSISTANT_MODEL", "unsloth/gemma-3-270m-it")
317
+ elif CHOSEN_LOCAL_MODEL_TYPE == "Qwen 3 4B": ASSISTANT_MODEL = get_or_create_env_var("ASSISTANT_MODEL", "unsloth/Qwen3-0.6B")
318
+
319
+ DRAFT_MODEL_LOC = get_or_create_env_var("DRAFT_MODEL_LOC", ".cache/llama.cpp/")
320
+
321
+ GEMMA3_DRAFT_MODEL_LOC = get_or_create_env_var("GEMMA3_DRAFT_MODEL_LOC", DRAFT_MODEL_LOC + "unsloth_gemma-3-270m-it-qat-GGUF_gemma-3-270m-it-qat-F16.gguf")
322
+
323
+ GEMMA3_4B_DRAFT_MODEL_LOC = get_or_create_env_var("GEMMA3_4B_DRAFT_MODEL_LOC", DRAFT_MODEL_LOC + "unsloth_gemma-3-4b-it-qat-GGUF_gemma-3-4b-it-qat-Q4_K_M.gguf")
324
 
325
+ QWEN3_4B_REPO_ID = get_or_create_env_var("QWEN3_4B_REPO_ID", "unsloth/Qwen3-4B-Instruct-2507-GGUF")
326
+ QWEN3_4B_REPO_TRANSFORMERS_ID = get_or_create_env_var("QWEN3_4B_REPO_TRANSFORMERS_ID", "unsloth/Qwen3-4B-unsloth-bnb-4bit")
327
+ if USE_LLAMA_CPP == "False": QWEN3_4B_REPO_ID = QWEN3_4B_REPO_TRANSFORMERS_ID
328
+
329
+ QWEN3_4B_MODEL_FILE = get_or_create_env_var("QWEN3_4B_MODEL_FILE", "Qwen3-4B-Instruct-2507-Q4_K_M.gguf")
330
+ QWEN3_4B_MODEL_FOLDER = get_or_create_env_var("QWEN3_4B_MODEL_FOLDER", "model/qwen")
331
+
332
+ QWEN3_DRAFT_MODEL_LOC = get_or_create_env_var("QWEN3_DRAFT_MODEL_LOC", DRAFT_MODEL_LOC + "Qwen3-0.6B-Q8_0.gguf")
333
+ QWEN3_4B_DRAFT_MODEL_LOC = get_or_create_env_var("QWEN3_4B_DRAFT_MODEL_LOC", DRAFT_MODEL_LOC + "Qwen3-4B-Instruct-2507-Q4_K_M.gguf")
334
 
335
  if CHOSEN_LOCAL_MODEL_TYPE == "Gemma 2b":
336
  LOCAL_REPO_ID = GEMMA2_REPO_ID
 
348
  LOCAL_MODEL_FILE = GEMMA3_4B_MODEL_FILE
349
  LOCAL_MODEL_FOLDER = GEMMA3_4B_MODEL_FOLDER
350
 
351
+ elif CHOSEN_LOCAL_MODEL_TYPE == "Qwen 3 4B":
352
+ LOCAL_REPO_ID = QWEN3_4B_REPO_ID
353
+ LOCAL_MODEL_FILE = QWEN3_4B_MODEL_FILE
354
+ LOCAL_MODEL_FOLDER = QWEN3_4B_MODEL_FOLDER
355
+
356
  elif CHOSEN_LOCAL_MODEL_TYPE == "gpt-oss-20b":
357
  LOCAL_REPO_ID = GPT_OSS_REPO_ID
358
  LOCAL_MODEL_FILE = GPT_OSS_MODEL_FILE
359
  LOCAL_MODEL_FOLDER = GPT_OSS_MODEL_FOLDER
360
 
361
  LLM_MAX_GPU_LAYERS = int(get_or_create_env_var('LLM_MAX_GPU_LAYERS','-1')) # Maximum possible
362
+ LLM_TEMPERATURE = float(get_or_create_env_var('LLM_TEMPERATURE', '0.6'))
363
  LLM_TOP_K = int(get_or_create_env_var('LLM_TOP_K','64')) # https://docs.unsloth.ai/basics/gemma-3-how-to-run-and-fine-tune
364
  LLM_MIN_P = float(get_or_create_env_var('LLM_MIN_P', '0'))
365
  LLM_TOP_P = float(get_or_create_env_var('LLM_TOP_P', '0.95'))
366
  LLM_REPETITION_PENALTY = float(get_or_create_env_var('LLM_REPETITION_PENALTY', '1.0'))
367
 
368
  LLM_LAST_N_TOKENS = int(get_or_create_env_var('LLM_LAST_N_TOKENS', '512'))
369
+ LLM_MAX_NEW_TOKENS = int(get_or_create_env_var('LLM_MAX_NEW_TOKENS', '8192'))
370
  LLM_SEED = int(get_or_create_env_var('LLM_SEED', '42'))
371
  LLM_RESET = get_or_create_env_var('LLM_RESET', 'True')
372
  LLM_STREAM = get_or_create_env_var('LLM_STREAM', 'True')
373
  LLM_THREADS = int(get_or_create_env_var('LLM_THREADS', '-1'))
374
+ LLM_BATCH_SIZE = int(get_or_create_env_var('LLM_BATCH_SIZE', '512'))
375
+ LLM_CONTEXT_LENGTH = int(get_or_create_env_var('LLM_CONTEXT_LENGTH', '32768'))
376
  LLM_SAMPLE = get_or_create_env_var('LLM_SAMPLE', 'True')
377
+ LLM_STOP_STRINGS = get_or_create_env_var('LLM_STOP_STRINGS', r"[' ','\n\n\n\n','---------------------------------------------]")
378
+ MULTIMODAL_PROMPT_FORMAT = get_or_create_env_var('MULTIMODAL_PROMPT_FORMAT', 'False')
379
  SPECULATIVE_DECODING = get_or_create_env_var('SPECULATIVE_DECODING', 'False')
380
  NUM_PRED_TOKENS = int(get_or_create_env_var('NUM_PRED_TOKENS', '2'))
381
+ KV_QUANT_LEVEL = int(get_or_create_env_var('KV_QUANT_LEVEL', '16'))
382
+
383
+
384
+
385
+
386
+ # If you are using e.g. gpt-oss, you can add a reasoning suffix to set reasoning level, or turn it off in the case of Qwen 3 4B
387
+ if CHOSEN_LOCAL_MODEL_TYPE == "gpt-oss-20b": REASONING_SUFFIX = get_or_create_env_var('REASONING_SUFFIX', 'Reasoning: low')
388
+ elif CHOSEN_LOCAL_MODEL_TYPE == "Qwen 3 4B" and USE_LLAMA_CPP == "False": REASONING_SUFFIX = get_or_create_env_var('REASONING_SUFFIX', '/nothink')
389
+ else: REASONING_SUFFIX = get_or_create_env_var('REASONING_SUFFIX', '')
390
 
391
  # Transformers variables
392
  COMPILE_TRANSFORMERS = get_or_create_env_var('COMPILE_TRANSFORMERS', 'False') # Whether to compile transformers models
tools/custom_csvlogger.py CHANGED
@@ -14,8 +14,7 @@ from multiprocessing import Lock
14
  from pathlib import Path
15
  from typing import TYPE_CHECKING, Any
16
  from gradio_client import utils as client_utils
17
- import gradio as gr
18
- from gradio import utils, wasm_utils
19
  from tools.config import AWS_REGION, AWS_ACCESS_KEY, AWS_SECRET_KEY, RUN_AWS_FUNCTIONS
20
 
21
 
@@ -56,9 +55,7 @@ class CSVLogger_custom(FlaggingCallback):
56
  self.simplify_file_data = simplify_file_data
57
  self.verbose = verbose
58
  self.dataset_file_name = dataset_file_name
59
- self.lock = (
60
- Lock() if not wasm_utils.IS_WASM else contextlib.nullcontext()
61
- ) # The multiprocessing module doesn't work on Lite.
62
 
63
  def setup(
64
  self,
 
14
  from pathlib import Path
15
  from typing import TYPE_CHECKING, Any
16
  from gradio_client import utils as client_utils
17
+ from gradio import utils
 
18
  from tools.config import AWS_REGION, AWS_ACCESS_KEY, AWS_SECRET_KEY, RUN_AWS_FUNCTIONS
19
 
20
 
 
55
  self.simplify_file_data = simplify_file_data
56
  self.verbose = verbose
57
  self.dataset_file_name = dataset_file_name
58
+ self.lock = Lock()
 
 
59
 
60
  def setup(
61
  self,
tools/dedup_summaries.py CHANGED
@@ -161,8 +161,6 @@ def deduplicate_topics(reference_df:pd.DataFrame,
161
 
162
  reference_file_out_path = output_folder + reference_table_file_name
163
  unique_topics_file_out_path = output_folder + unique_topics_table_file_name
164
- #reference_df.to_csv(reference_file_out_path, index = None, encoding='utf-8-sig')
165
- #topic_summary_df.to_csv(unique_topics_file_out_path, index=None, encoding='utf-8-sig')
166
 
167
  output_files.append(reference_file_out_path)
168
  output_files.append(unique_topics_file_out_path)
@@ -195,13 +193,17 @@ def deduplicate_topics(reference_df:pd.DataFrame,
195
  if "Group" not in reference_df.columns:
196
  reference_df["Group"] = "All"
197
  for i in range(0, 8):
198
- if merge_sentiment == "No":
199
  if merge_general_topics == "No":
200
  reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
201
  reference_df_unique = reference_df.drop_duplicates("old_category")
202
 
203
- deduplicated_topic_map_df = reference_df_unique.groupby(["General topic", "Sentiment", "Group"]).apply(
204
- lambda group: deduplicate_categories(
 
 
 
 
205
  group["Subtopic"],
206
  group["Sentiment"],
207
  reference_df,
@@ -209,30 +211,38 @@ def deduplicate_topics(reference_df:pd.DataFrame,
209
  merge_general_topics="No",
210
  threshold=score_threshold
211
  )
212
- ).reset_index(drop=True)
 
 
 
 
 
213
  else:
214
  # This case should allow cross-topic matching but is still grouping by Sentiment
215
  reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
216
  reference_df_unique = reference_df.drop_duplicates("old_category")
217
-
218
- deduplicated_topic_map_df = reference_df_unique.groupby("Sentiment").apply(
219
- lambda group: deduplicate_categories(
 
220
  group["Subtopic"],
221
  group["Sentiment"],
222
  reference_df,
223
- general_topic_series=None, # Set to None to allow cross-topic matching
224
  merge_general_topics="Yes",
225
  threshold=score_threshold
226
  )
227
- ).reset_index(drop=True)
 
 
228
  else:
229
  if merge_general_topics == "No":
230
- # Update this case to maintain general topic boundaries
231
  reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
232
  reference_df_unique = reference_df.drop_duplicates("old_category")
233
-
234
- deduplicated_topic_map_df = reference_df_unique.groupby("General topic").apply(
235
- lambda group: deduplicate_categories(
 
236
  group["Subtopic"],
237
  group["Sentiment"],
238
  reference_df,
@@ -241,9 +251,10 @@ def deduplicate_topics(reference_df:pd.DataFrame,
241
  merge_sentiment=merge_sentiment,
242
  threshold=score_threshold
243
  )
244
- ).reset_index(drop=True)
245
- else:
246
- # For complete merging across all categories
 
247
  reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
248
  reference_df_unique = reference_df.drop_duplicates("old_category")
249
 
@@ -251,14 +262,13 @@ def deduplicate_topics(reference_df:pd.DataFrame,
251
  reference_df_unique["Subtopic"],
252
  reference_df_unique["Sentiment"],
253
  reference_df,
254
- general_topic_series=None, # Set to None to allow cross-topic matching
255
  merge_general_topics="Yes",
256
  merge_sentiment=merge_sentiment,
257
  threshold=score_threshold
258
  ).reset_index(drop=True)
259
-
260
  if deduplicated_topic_map_df['deduplicated_category'].isnull().all():
261
- # Check if 'deduplicated_category' contains any values
262
  print("No deduplicated categories found, skipping the following code.")
263
 
264
  else:
@@ -785,6 +795,9 @@ def summarise_output_topics(sampled_reference_table_df:pd.DataFrame,
785
  for prompt, summary, metadata, batch, model_choice, validated, group, task_type, file_name in zip(all_prompts_content, all_summaries_content, all_metadata_content, all_batches_content, all_model_choice_content, all_validated_content, all_groups_content, all_task_type_content, all_file_names_content)
786
  ]
787
 
 
 
 
788
  out_logged_content = existing_logged_content + all_logged_content
789
 
790
  ### Save output files
@@ -1004,7 +1017,7 @@ def overall_summary(topic_summary_df:pd.DataFrame,
1004
  # Write overall outputs to csv
1005
  overall_summary_output_csv_path = output_folder + batch_file_path_details + "_overall_summary_" + model_choice_clean_short + ".csv"
1006
  summarised_outputs_df = pd.DataFrame(data={"Group":unique_groups, "Summary":summarised_outputs_for_df})
1007
- summarised_outputs_df.to_csv(overall_summary_output_csv_path, index=None)
1008
  output_files.append(overall_summary_output_csv_path)
1009
 
1010
  summarised_outputs_df_for_display = pd.DataFrame(data={"Group":unique_groups, "Summary":summarised_outputs})
@@ -1031,6 +1044,9 @@ def overall_summary(topic_summary_df:pd.DataFrame,
1031
  for prompt, summary, metadata, batch, model_choice, validated, group, task_type, file_name in zip(all_prompts_content, all_summaries_content, all_metadata_content, all_batches_content, all_model_choice_content, all_validated_content, all_groups_content, all_task_type_content, all_file_names_content)
1032
  ]
1033
 
 
 
 
1034
  out_logged_content = existing_logged_content + all_logged_content
1035
 
1036
  return output_files, html_output_table, summarised_outputs_df, out_metadata_str, input_tokens_num, output_tokens_num, number_of_calls_num, time_taken, out_message, out_logged_content
 
161
 
162
  reference_file_out_path = output_folder + reference_table_file_name
163
  unique_topics_file_out_path = output_folder + unique_topics_table_file_name
 
 
164
 
165
  output_files.append(reference_file_out_path)
166
  output_files.append(unique_topics_file_out_path)
 
193
  if "Group" not in reference_df.columns:
194
  reference_df["Group"] = "All"
195
  for i in range(0, 8):
196
+ if merge_sentiment == "No":
197
  if merge_general_topics == "No":
198
  reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
199
  reference_df_unique = reference_df.drop_duplicates("old_category")
200
 
201
+ # Create an empty list to store results from each group
202
+ results = []
203
+ # Iterate over each group instead of using .apply()
204
+ for name, group in reference_df_unique.groupby(["General topic", "Sentiment", "Group"]):
205
+ # Run your function on the 'group' DataFrame
206
+ result = deduplicate_categories(
207
  group["Subtopic"],
208
  group["Sentiment"],
209
  reference_df,
 
211
  merge_general_topics="No",
212
  threshold=score_threshold
213
  )
214
+ results.append(result)
215
+
216
+ # Concatenate all the results into a single DataFrame
217
+ deduplicated_topic_map_df = pd.concat(results).reset_index(drop=True)
218
+ # --- MODIFIED SECTION END ---
219
+
220
  else:
221
  # This case should allow cross-topic matching but is still grouping by Sentiment
222
  reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
223
  reference_df_unique = reference_df.drop_duplicates("old_category")
224
+
225
+ results = []
226
+ for name, group in reference_df_unique.groupby("Sentiment"):
227
+ result = deduplicate_categories(
228
  group["Subtopic"],
229
  group["Sentiment"],
230
  reference_df,
231
+ general_topic_series=None,
232
  merge_general_topics="Yes",
233
  threshold=score_threshold
234
  )
235
+ results.append(result)
236
+ deduplicated_topic_map_df = pd.concat(results).reset_index(drop=True)
237
+
238
  else:
239
  if merge_general_topics == "No":
 
240
  reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
241
  reference_df_unique = reference_df.drop_duplicates("old_category")
242
+
243
+ results = []
244
+ for name, group in reference_df_unique.groupby("General topic"):
245
+ result = deduplicate_categories(
246
  group["Subtopic"],
247
  group["Sentiment"],
248
  reference_df,
 
251
  merge_sentiment=merge_sentiment,
252
  threshold=score_threshold
253
  )
254
+ results.append(result)
255
+ deduplicated_topic_map_df = pd.concat(results).reset_index(drop=True)
256
+
257
+ else:
258
  reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
259
  reference_df_unique = reference_df.drop_duplicates("old_category")
260
 
 
262
  reference_df_unique["Subtopic"],
263
  reference_df_unique["Sentiment"],
264
  reference_df,
265
+ general_topic_series=None,
266
  merge_general_topics="Yes",
267
  merge_sentiment=merge_sentiment,
268
  threshold=score_threshold
269
  ).reset_index(drop=True)
270
+
271
  if deduplicated_topic_map_df['deduplicated_category'].isnull().all():
 
272
  print("No deduplicated categories found, skipping the following code.")
273
 
274
  else:
 
795
  for prompt, summary, metadata, batch, model_choice, validated, group, task_type, file_name in zip(all_prompts_content, all_summaries_content, all_metadata_content, all_batches_content, all_model_choice_content, all_validated_content, all_groups_content, all_task_type_content, all_file_names_content)
796
  ]
797
 
798
+ if isinstance(existing_logged_content, pd.DataFrame):
799
+ existing_logged_content = existing_logged_content.to_dict(orient="records")
800
+
801
  out_logged_content = existing_logged_content + all_logged_content
802
 
803
  ### Save output files
 
1017
  # Write overall outputs to csv
1018
  overall_summary_output_csv_path = output_folder + batch_file_path_details + "_overall_summary_" + model_choice_clean_short + ".csv"
1019
  summarised_outputs_df = pd.DataFrame(data={"Group":unique_groups, "Summary":summarised_outputs_for_df})
1020
+ summarised_outputs_df.to_csv(overall_summary_output_csv_path, index=None, encoding='utf-8-sig')
1021
  output_files.append(overall_summary_output_csv_path)
1022
 
1023
  summarised_outputs_df_for_display = pd.DataFrame(data={"Group":unique_groups, "Summary":summarised_outputs})
 
1044
  for prompt, summary, metadata, batch, model_choice, validated, group, task_type, file_name in zip(all_prompts_content, all_summaries_content, all_metadata_content, all_batches_content, all_model_choice_content, all_validated_content, all_groups_content, all_task_type_content, all_file_names_content)
1045
  ]
1046
 
1047
+ if isinstance(existing_logged_content, pd.DataFrame):
1048
+ existing_logged_content = existing_logged_content.to_dict(orient="records")
1049
+
1050
  out_logged_content = existing_logged_content + all_logged_content
1051
 
1052
  return output_files, html_output_table, summarised_outputs_df, out_metadata_str, input_tokens_num, output_tokens_num, number_of_calls_num, time_taken, out_message, out_logged_content
tools/llm_api_call.py CHANGED
@@ -15,7 +15,7 @@ from typing import List, Tuple, Any
15
  from io import StringIO
16
  GradioFileData = gr.FileData
17
 
18
- from tools.prompts import initial_table_prompt, prompt2, prompt3, initial_table_system_prompt, add_existing_topics_system_prompt, add_existing_topics_prompt, force_existing_topics_prompt, allow_new_topics_prompt, force_single_topic_prompt, add_existing_topics_assistant_prefill, initial_table_assistant_prefill, structured_summary_prompt
19
  from tools.helper_functions import read_file, put_columns_in_df, wrap_text, initial_clean, load_in_data_file, load_in_file, create_topic_summary_df_from_reference_table, convert_reference_table_to_pivot_table, get_basic_response_data, clean_column_name, load_in_previous_data_files, create_batch_file_path_details, move_overall_summary_output_files_to_front_page
20
  from tools.llm_funcs import ResponseObject, construct_gemini_generative_model, call_llm_with_markdown_table_checks, create_missing_references_df, calculate_tokens_from_metadata, construct_azure_client, get_model, get_tokenizer, get_assistant_model
21
  from tools.config import RUN_LOCAL_MODEL, AWS_REGION, MAX_COMMENT_CHARS, MAX_OUTPUT_VALIDATION_ATTEMPTS, LLM_MAX_NEW_TOKENS, TIMEOUT_WAIT, NUMBER_OF_RETRY_ATTEMPTS, MAX_TIME_FOR_LOOP, BATCH_SIZE_DEFAULT, DEDUPLICATION_THRESHOLD, model_name_map, OUTPUT_FOLDER, CHOSEN_LOCAL_MODEL_TYPE, LOCAL_REPO_ID, LOCAL_MODEL_FILE, LOCAL_MODEL_FOLDER, LLM_SEED, MAX_GROUPS, REASONING_SUFFIX, AZURE_INFERENCE_ENDPOINT, MAX_ROWS, MAXIMUM_ZERO_SHOT_TOPICS, MAX_SPACES_GPU_RUN_TIME, OUTPUT_DEBUG_FILES
@@ -352,9 +352,9 @@ def write_llm_output_and_logs(response_text: str,
352
  topic_table_out_path = "topic_table_error.csv"
353
  reference_table_out_path = "reference_table_error.csv"
354
  topic_summary_df_out_path = "unique_topic_table_error.csv"
355
- topic_with_response_df = pd.DataFrame()
356
- out_reference_df = pd.DataFrame()
357
- out_topic_summary_df = pd.DataFrame()
358
  is_error = False # If there was an error in parsing, return boolean saying error
359
  # Convert conversation to string and add to log outputs
360
  whole_conversation_str = '\n'.join(whole_conversation)
@@ -385,6 +385,7 @@ def write_llm_output_and_logs(response_text: str,
385
  topic_with_response_df, is_error = convert_response_text_to_dataframe(response_text)
386
  except Exception as e:
387
  print("Error in parsing markdown table from response text:", e)
 
388
  return topic_table_out_path, reference_table_out_path, topic_summary_df_out_path, topic_with_response_df, out_reference_df, out_topic_summary_df, batch_file_path_details, is_error
389
 
390
  # Rename columns to ensure consistent use of data frames later in code
@@ -420,8 +421,11 @@ def write_llm_output_and_logs(response_text: str,
420
  for index, row in topic_with_response_df.iterrows():
421
  references = re.findall(r'\d+', str(row.iloc[3])) if pd.notna(row.iloc[3]) else []
422
  # If no numbers found in the Response References column, check the Summary column in case reference numbers were put there by mistake
423
- if not references:
424
- references = re.findall(r'\d+', str(row.iloc[4])) if pd.notna(row.iloc[4]) else []
 
 
 
425
 
426
  # Filter out references that are outside the valid range
427
  if references:
@@ -695,6 +699,7 @@ def extract_topics(in_data_file: GradioFileData,
695
  assistant_model:object=list(),
696
  max_rows:int=max_rows,
697
  original_full_file_name:str="",
 
698
  progress=Progress(track_tqdm=False)):
699
 
700
  '''
@@ -749,6 +754,7 @@ def extract_topics(in_data_file: GradioFileData,
749
  - assistant_model: Assistant model object for local inference.
750
  - max_rows: The maximum number of rows to process.
751
  - original_full_file_name: The original full file name.
 
752
  - progress (Progress): A progress tracker.
753
 
754
  '''
@@ -863,6 +869,9 @@ def extract_topics(in_data_file: GradioFileData,
863
  # Call the function to prepare the input table
864
  simplified_csv_table_path, normalised_simple_markdown_table, start_row, end_row, batch_basic_response_df = data_file_to_markdown_table(file_data, file_name, chosen_cols, latest_batch_completed, batch_size)
865
 
 
 
 
866
  # Conversation history
867
  conversation_history = list()
868
 
@@ -951,11 +960,15 @@ def extract_topics(in_data_file: GradioFileData,
951
  # Format the summary prompt with the response table and topics
952
  if produce_structures_summary_radio != "Yes":
953
  formatted_summary_prompt = add_existing_topics_prompt.format(response_table=normalised_simple_markdown_table,
954
- topics=unique_topics_markdown,
955
- topic_assignment=topic_assignment_prompt, force_single_topic=force_single_topic_prompt, sentiment_choices=sentiment_prompt)
 
 
 
 
956
  else:
957
  formatted_summary_prompt = structured_summary_prompt.format(response_table=normalised_simple_markdown_table,
958
- topics=unique_topics_markdown)
959
 
960
  full_prompt = formatted_system_prompt + "\n" + formatted_summary_prompt
961
 
@@ -997,7 +1010,7 @@ def extract_topics(in_data_file: GradioFileData,
997
 
998
  ## Reference table mapping response numbers to topics
999
  if output_debug_files == "True":
1000
- new_reference_df.to_csv(reference_table_out_path, index=None)
1001
  out_file_paths.append(reference_table_out_path)
1002
 
1003
  ## Unique topic list
@@ -1006,7 +1019,7 @@ def extract_topics(in_data_file: GradioFileData,
1006
  new_topic_summary_df["Group"] = group_name
1007
 
1008
  if output_debug_files == "True":
1009
- new_topic_summary_df.to_csv(topic_summary_df_out_path, index=None)
1010
  out_file_paths.append(topic_summary_df_out_path)
1011
 
1012
  # Outputs for markdown table output
@@ -1039,7 +1052,8 @@ def extract_topics(in_data_file: GradioFileData,
1039
 
1040
  # Format the summary prompt with the response table and topics
1041
  if produce_structures_summary_radio != "Yes":
1042
- formatted_initial_table_prompt = initial_table_prompt.format(response_table=normalised_simple_markdown_table, sentiment_choices=sentiment_prompt)
 
1043
  else:
1044
  unique_topics_markdown="No suggested headings for this summary"
1045
  formatted_initial_table_prompt = structured_summary_prompt.format(response_table=normalised_simple_markdown_table, topics=unique_topics_markdown)
@@ -1076,7 +1090,7 @@ def extract_topics(in_data_file: GradioFileData,
1076
  if output_debug_files == "True":
1077
 
1078
  # Output reference table
1079
- reference_df.to_csv(reference_table_out_path, index=None)
1080
  out_file_paths.append(reference_table_out_path)
1081
 
1082
  ## Unique topic list
@@ -1086,7 +1100,7 @@ def extract_topics(in_data_file: GradioFileData,
1086
  new_topic_summary_df["Group"] = group_name
1087
 
1088
  if output_debug_files == "True":
1089
- new_topic_summary_df.to_csv(topic_summary_df_out_path, index=None)
1090
  out_file_paths.append(topic_summary_df_out_path)
1091
 
1092
  whole_conversation_metadata.append(whole_conversation_metadata_str)
@@ -1160,7 +1174,7 @@ def extract_topics(in_data_file: GradioFileData,
1160
  basic_response_data_out_path = output_folder + file_path_details + "_simplified_data_file_" + model_choice_clean_short + "_temp_" + str(temperature) + ".csv"
1161
 
1162
  ## Reference table mapping response numbers to topics
1163
- existing_reference_df.to_csv(reference_table_out_path, index=None)
1164
  out_file_paths.append(reference_table_out_path)
1165
  join_file_paths.append(reference_table_out_path)
1166
 
@@ -1250,6 +1264,7 @@ def wrapper_extract_topics_per_column_value(
1250
  azure_api_key_textbox:str="",
1251
  output_folder: str = OUTPUT_FOLDER,
1252
  existing_logged_content:list=list(),
 
1253
  force_single_topic_prompt: str = force_single_topic_prompt,
1254
  max_tokens: int = max_tokens,
1255
  model_name_map: dict = model_name_map,
@@ -1304,6 +1319,7 @@ def wrapper_extract_topics_per_column_value(
1304
  :param output_folder: The folder where output files will be saved.
1305
  :param existing_logged_content: A list of existing logged content.
1306
  :param force_single_topic_prompt: Prompt for forcing a single topic.
 
1307
  :param max_tokens: Maximum tokens for LLM generation.
1308
  :param model_name_map: Dictionary mapping model names to their properties.
1309
  :param max_time_for_loop: Maximum time allowed for the processing loop.
@@ -1312,7 +1328,7 @@ def wrapper_extract_topics_per_column_value(
1312
  :param model: Model object for local inference.
1313
  :param tokenizer: Tokenizer object for local inference.
1314
  :param assistant_model: Assistant model object for local inference.
1315
- :param max_rows: The maximum number of rows to process.
1316
  :param progress: Gradio Progress object for tracking progress.
1317
  :return: A tuple containing consolidated results, mimicking the return structure of `extract_topics`.
1318
  """
@@ -1488,6 +1504,7 @@ def wrapper_extract_topics_per_column_value(
1488
  max_rows=max_rows,
1489
  existing_logged_content=all_logged_content,
1490
  original_full_file_name=original_file_name,
 
1491
  progress=progress
1492
  )
1493
 
@@ -1521,21 +1538,23 @@ def wrapper_extract_topics_per_column_value(
1521
  # For now, it will continue
1522
  continue
1523
 
 
 
 
 
 
1524
  if "Group" in acc_reference_df.columns:
1525
- model_choice_clean = model_name_map[model_choice]["short_name"]
1526
- model_choice_clean_short = clean_column_name(model_choice_clean, max_length=20, front_characters=False)
1527
- overall_file_name = clean_column_name(original_file_name, max_length=20)
1528
- column_clean = clean_column_name(chosen_cols, max_length=20)
1529
 
1530
  acc_reference_df_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_final_reference_table_" + model_choice_clean_short + ".csv"
1531
  acc_topic_summary_df_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_final_unique_topics_" + model_choice_clean_short + ".csv"
1532
  acc_reference_df_pivot_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_final_reference_pivot_" + model_choice_clean_short + ".csv"
1533
  acc_missing_df_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_missing_df_" + model_choice_clean_short + ".csv"
1534
 
1535
- acc_reference_df.to_csv(acc_reference_df_path, index=None)
1536
- acc_topic_summary_df.to_csv(acc_topic_summary_df_path, index=None)
1537
- acc_reference_df_pivot.to_csv(acc_reference_df_pivot_path, index=None)
1538
- acc_missing_df.to_csv(acc_missing_df_path, index=None)
1539
 
1540
  acc_log_files_output_paths.append(acc_missing_df_path)
1541
 
@@ -1740,6 +1759,7 @@ def all_in_one_pipeline(
1740
  model_name_map_state: dict = model_name_map,
1741
  usage_logs_location: str = "",
1742
  existing_logged_content:list=list(),
 
1743
  model: object = None,
1744
  tokenizer: object = None,
1745
  assistant_model: object = None,
@@ -1749,7 +1769,60 @@ def all_in_one_pipeline(
1749
  """
1750
  Orchestrates the full All-in-one flow: extract → deduplicate → summarise → overall summary → Excel export.
1751
 
1752
- Returns a large tuple matching the UI components updated during the original chained flow.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1753
  """
1754
 
1755
  # Load local model if it's not already loaded
@@ -1830,7 +1903,8 @@ def all_in_one_pipeline(
1830
  model=model,
1831
  tokenizer=tokenizer,
1832
  assistant_model=assistant_model,
1833
- max_rows=max_rows
 
1834
  )
1835
 
1836
  total_input_tokens += out_input_tokens
 
15
  from io import StringIO
16
  GradioFileData = gr.FileData
17
 
18
+ from tools.prompts import initial_table_prompt, prompt2, prompt3, initial_table_system_prompt, add_existing_topics_system_prompt, add_existing_topics_prompt, force_existing_topics_prompt, allow_new_topics_prompt, force_single_topic_prompt, add_existing_topics_assistant_prefill, initial_table_assistant_prefill, structured_summary_prompt, default_response_reference_format, single_response_reference_format
19
  from tools.helper_functions import read_file, put_columns_in_df, wrap_text, initial_clean, load_in_data_file, load_in_file, create_topic_summary_df_from_reference_table, convert_reference_table_to_pivot_table, get_basic_response_data, clean_column_name, load_in_previous_data_files, create_batch_file_path_details, move_overall_summary_output_files_to_front_page
20
  from tools.llm_funcs import ResponseObject, construct_gemini_generative_model, call_llm_with_markdown_table_checks, create_missing_references_df, calculate_tokens_from_metadata, construct_azure_client, get_model, get_tokenizer, get_assistant_model
21
  from tools.config import RUN_LOCAL_MODEL, AWS_REGION, MAX_COMMENT_CHARS, MAX_OUTPUT_VALIDATION_ATTEMPTS, LLM_MAX_NEW_TOKENS, TIMEOUT_WAIT, NUMBER_OF_RETRY_ATTEMPTS, MAX_TIME_FOR_LOOP, BATCH_SIZE_DEFAULT, DEDUPLICATION_THRESHOLD, model_name_map, OUTPUT_FOLDER, CHOSEN_LOCAL_MODEL_TYPE, LOCAL_REPO_ID, LOCAL_MODEL_FILE, LOCAL_MODEL_FOLDER, LLM_SEED, MAX_GROUPS, REASONING_SUFFIX, AZURE_INFERENCE_ENDPOINT, MAX_ROWS, MAXIMUM_ZERO_SHOT_TOPICS, MAX_SPACES_GPU_RUN_TIME, OUTPUT_DEBUG_FILES
 
352
  topic_table_out_path = "topic_table_error.csv"
353
  reference_table_out_path = "reference_table_error.csv"
354
  topic_summary_df_out_path = "unique_topic_table_error.csv"
355
+ topic_with_response_df = pd.DataFrame(columns=["General topic", "Subtopic", "Sentiment", "Response References", "Summary"])
356
+ out_reference_df = pd.DataFrame(columns=["Response References", "General topic", "Subtopic", "Sentiment", "Summary", "Start row of group"])
357
+ out_topic_summary_df = pd.DataFrame(columns=["General topic", "Subtopic", "Sentiment"])
358
  is_error = False # If there was an error in parsing, return boolean saying error
359
  # Convert conversation to string and add to log outputs
360
  whole_conversation_str = '\n'.join(whole_conversation)
 
385
  topic_with_response_df, is_error = convert_response_text_to_dataframe(response_text)
386
  except Exception as e:
387
  print("Error in parsing markdown table from response text:", e)
388
+
389
  return topic_table_out_path, reference_table_out_path, topic_summary_df_out_path, topic_with_response_df, out_reference_df, out_topic_summary_df, batch_file_path_details, is_error
390
 
391
  # Rename columns to ensure consistent use of data frames later in code
 
421
  for index, row in topic_with_response_df.iterrows():
422
  references = re.findall(r'\d+', str(row.iloc[3])) if pd.notna(row.iloc[3]) else []
423
  # If no numbers found in the Response References column, check the Summary column in case reference numbers were put there by mistake
424
+ ##if not references:
425
+ # references = re.findall(r'\d+', str(row.iloc[4])) if pd.notna(row.iloc[4]) else []
426
+ # If batch size is 1, references will always be 1
427
+ if batch_size_number == 1:
428
+ references = "1"
429
 
430
  # Filter out references that are outside the valid range
431
  if references:
 
699
  assistant_model:object=list(),
700
  max_rows:int=max_rows,
701
  original_full_file_name:str="",
702
+ add_existing_topics_summary_format:str="",
703
  progress=Progress(track_tqdm=False)):
704
 
705
  '''
 
754
  - assistant_model: Assistant model object for local inference.
755
  - max_rows: The maximum number of rows to process.
756
  - original_full_file_name: The original full file name.
757
+ - add_existing_topics_summary_format: Initial instructions to guide the format for the initial summary of the topics.
758
  - progress (Progress): A progress tracker.
759
 
760
  '''
 
869
  # Call the function to prepare the input table
870
  simplified_csv_table_path, normalised_simple_markdown_table, start_row, end_row, batch_basic_response_df = data_file_to_markdown_table(file_data, file_name, chosen_cols, latest_batch_completed, batch_size)
871
 
872
+ if batch_basic_response_df.shape[0] == 1: response_reference_format = single_response_reference_format
873
+ else: response_reference_format = default_response_reference_format
874
+
875
  # Conversation history
876
  conversation_history = list()
877
 
 
960
  # Format the summary prompt with the response table and topics
961
  if produce_structures_summary_radio != "Yes":
962
  formatted_summary_prompt = add_existing_topics_prompt.format(response_table=normalised_simple_markdown_table,
963
+ topics=unique_topics_markdown,
964
+ topic_assignment=topic_assignment_prompt,
965
+ force_single_topic=force_single_topic_prompt,
966
+ sentiment_choices=sentiment_prompt,
967
+ response_reference_format=response_reference_format,
968
+ add_existing_topics_summary_format=add_existing_topics_summary_format)
969
  else:
970
  formatted_summary_prompt = structured_summary_prompt.format(response_table=normalised_simple_markdown_table,
971
+ topics=unique_topics_markdown)
972
 
973
  full_prompt = formatted_system_prompt + "\n" + formatted_summary_prompt
974
 
 
1010
 
1011
  ## Reference table mapping response numbers to topics
1012
  if output_debug_files == "True":
1013
+ new_reference_df.to_csv(reference_table_out_path, index=None, encoding='utf-8-sig')
1014
  out_file_paths.append(reference_table_out_path)
1015
 
1016
  ## Unique topic list
 
1019
  new_topic_summary_df["Group"] = group_name
1020
 
1021
  if output_debug_files == "True":
1022
+ new_topic_summary_df.to_csv(topic_summary_df_out_path, index=None, encoding='utf-8-sig')
1023
  out_file_paths.append(topic_summary_df_out_path)
1024
 
1025
  # Outputs for markdown table output
 
1052
 
1053
  # Format the summary prompt with the response table and topics
1054
  if produce_structures_summary_radio != "Yes":
1055
+ formatted_initial_table_prompt = initial_table_prompt.format(response_table=normalised_simple_markdown_table, sentiment_choices=sentiment_prompt,
1056
+ response_reference_format=response_reference_format, add_existing_topics_summary_format=add_existing_topics_summary_format)
1057
  else:
1058
  unique_topics_markdown="No suggested headings for this summary"
1059
  formatted_initial_table_prompt = structured_summary_prompt.format(response_table=normalised_simple_markdown_table, topics=unique_topics_markdown)
 
1090
  if output_debug_files == "True":
1091
 
1092
  # Output reference table
1093
+ reference_df.to_csv(reference_table_out_path, index=None, encoding='utf-8-sig')
1094
  out_file_paths.append(reference_table_out_path)
1095
 
1096
  ## Unique topic list
 
1100
  new_topic_summary_df["Group"] = group_name
1101
 
1102
  if output_debug_files == "True":
1103
+ new_topic_summary_df.to_csv(topic_summary_df_out_path, index=None, encoding='utf-8-sig')
1104
  out_file_paths.append(topic_summary_df_out_path)
1105
 
1106
  whole_conversation_metadata.append(whole_conversation_metadata_str)
 
1174
  basic_response_data_out_path = output_folder + file_path_details + "_simplified_data_file_" + model_choice_clean_short + "_temp_" + str(temperature) + ".csv"
1175
 
1176
  ## Reference table mapping response numbers to topics
1177
+ existing_reference_df.to_csv(reference_table_out_path, index=None, encoding='utf-8-sig')
1178
  out_file_paths.append(reference_table_out_path)
1179
  join_file_paths.append(reference_table_out_path)
1180
 
 
1264
  azure_api_key_textbox:str="",
1265
  output_folder: str = OUTPUT_FOLDER,
1266
  existing_logged_content:list=list(),
1267
+ add_existing_topics_summary_format:str="",
1268
  force_single_topic_prompt: str = force_single_topic_prompt,
1269
  max_tokens: int = max_tokens,
1270
  model_name_map: dict = model_name_map,
 
1319
  :param output_folder: The folder where output files will be saved.
1320
  :param existing_logged_content: A list of existing logged content.
1321
  :param force_single_topic_prompt: Prompt for forcing a single topic.
1322
+ :param add_existing_topics_summary_format: Initial instructions to guide the format for the initial summary of the topics.
1323
  :param max_tokens: Maximum tokens for LLM generation.
1324
  :param model_name_map: Dictionary mapping model names to their properties.
1325
  :param max_time_for_loop: Maximum time allowed for the processing loop.
 
1328
  :param model: Model object for local inference.
1329
  :param tokenizer: Tokenizer object for local inference.
1330
  :param assistant_model: Assistant model object for local inference.
1331
+ :param max_rows: The maximum number of rows to process.
1332
  :param progress: Gradio Progress object for tracking progress.
1333
  :return: A tuple containing consolidated results, mimicking the return structure of `extract_topics`.
1334
  """
 
1504
  max_rows=max_rows,
1505
  existing_logged_content=all_logged_content,
1506
  original_full_file_name=original_file_name,
1507
+ add_existing_topics_summary_format=add_existing_topics_summary_format,
1508
  progress=progress
1509
  )
1510
 
 
1538
  # For now, it will continue
1539
  continue
1540
 
1541
+ overall_file_name = clean_column_name(original_file_name, max_length=20)
1542
+ model_choice_clean = model_name_map[model_choice]["short_name"]
1543
+ model_choice_clean_short = clean_column_name(model_choice_clean, max_length=20, front_characters=False)
1544
+ column_clean = clean_column_name(chosen_cols, max_length=20)
1545
+
1546
  if "Group" in acc_reference_df.columns:
1547
+
 
 
 
1548
 
1549
  acc_reference_df_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_final_reference_table_" + model_choice_clean_short + ".csv"
1550
  acc_topic_summary_df_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_final_unique_topics_" + model_choice_clean_short + ".csv"
1551
  acc_reference_df_pivot_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_final_reference_pivot_" + model_choice_clean_short + ".csv"
1552
  acc_missing_df_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_missing_df_" + model_choice_clean_short + ".csv"
1553
 
1554
+ acc_reference_df.to_csv(acc_reference_df_path, index=None, encoding='utf-8-sig')
1555
+ acc_topic_summary_df.to_csv(acc_topic_summary_df_path, index=None, encoding='utf-8-sig')
1556
+ acc_reference_df_pivot.to_csv(acc_reference_df_pivot_path, index=None, encoding='utf-8-sig')
1557
+ acc_missing_df.to_csv(acc_missing_df_path, index=None, encoding='utf-8-sig')
1558
 
1559
  acc_log_files_output_paths.append(acc_missing_df_path)
1560
 
 
1759
  model_name_map_state: dict = model_name_map,
1760
  usage_logs_location: str = "",
1761
  existing_logged_content:list=list(),
1762
+ add_existing_topics_summary_format:str="",
1763
  model: object = None,
1764
  tokenizer: object = None,
1765
  assistant_model: object = None,
 
1769
  """
1770
  Orchestrates the full All-in-one flow: extract → deduplicate → summarise → overall summary → Excel export.
1771
 
1772
+ Args:
1773
+ grouping_col (str): The column used for grouping data.
1774
+ in_data_files (List[str]): List of input data file paths.
1775
+ file_data (pd.DataFrame): The input data as a pandas DataFrame.
1776
+ existing_topics_table (pd.DataFrame): DataFrame of existing topics.
1777
+ existing_reference_df (pd.DataFrame): DataFrame of existing reference data.
1778
+ existing_topic_summary_df (pd.DataFrame): DataFrame of existing topic summaries.
1779
+ unique_table_df_display_table_markdown (str): Markdown string for displaying unique topics.
1780
+ original_file_name (str): The original name of the input file.
1781
+ total_number_of_batches (int): Total number of batches for processing.
1782
+ in_api_key (str): API key for the LLM.
1783
+ temperature (float): Temperature setting for the LLM.
1784
+ chosen_cols (List[str]): List of columns chosen for analysis.
1785
+ model_choice (str): The chosen LLM model.
1786
+ candidate_topics (GradioFileData): Gradio file data for candidate topics.
1787
+ first_loop_state (bool): State indicating if it's the first loop.
1788
+ conversation_metadata_text (str): Text containing conversation metadata.
1789
+ latest_batch_completed (int): The latest batch number completed.
1790
+ time_taken_so_far (float): Cumulative time taken so far.
1791
+ initial_table_prompt_text (str): Initial prompt text for table generation.
1792
+ initial_table_system_prompt_text (str): Initial system prompt text for table generation.
1793
+ add_existing_topics_system_prompt_text (str): System prompt for adding existing topics.
1794
+ add_existing_topics_prompt_text (str): Prompt for adding existing topics.
1795
+ number_of_prompts_used (int): Number of prompts used in sequence.
1796
+ batch_size (int): Size of each processing batch.
1797
+ context_text (str): Additional context for the LLM.
1798
+ sentiment_choice (str): Choice for sentiment analysis (e.g., "Yes", "No").
1799
+ force_zero_shot_choice (str): Choice to force zero-shot prompting.
1800
+ in_excel_sheets (List[str]): List of sheet names in the input Excel file.
1801
+ force_single_topic_choice (str): Choice to force single topic extraction.
1802
+ produce_structures_summary_choice (str): Choice to produce structured summaries.
1803
+ aws_access_key_text (str): AWS access key.
1804
+ aws_secret_key_text (str): AWS secret key.
1805
+ hf_api_key_text (str): Hugging Face API key.
1806
+ azure_api_key_text (str): Azure API key.
1807
+ output_folder (str, optional): Folder to save output files. Defaults to OUTPUT_FOLDER.
1808
+ merge_sentiment (str, optional): Whether to merge sentiment. Defaults to "No".
1809
+ merge_general_topics (str, optional): Whether to merge general topics. Defaults to "Yes".
1810
+ score_threshold (int, optional): Score threshold for topic matching. Defaults to 90.
1811
+ summarise_format (str, optional): Format for summarization. Defaults to "".
1812
+ random_seed (int, optional): Random seed for reproducibility. Defaults to 42.
1813
+ log_files_output_list_state (List[str], optional): List of log file paths. Defaults to list().
1814
+ model_name_map_state (dict, optional): Mapping of model names. Defaults to model_name_map.
1815
+ usage_logs_location (str, optional): Location for usage logs. Defaults to "".
1816
+ existing_logged_content (list, optional): Existing logged content. Defaults to list().
1817
+ add_existing_topics_summary_format (str, optional): Summary format for adding existing topics. Defaults to "".
1818
+ model (object, optional): Loaded local model object. Defaults to None.
1819
+ tokenizer (object, optional): Loaded local tokenizer object. Defaults to None.
1820
+ assistant_model (object, optional): Loaded local assistant model object. Defaults to None.
1821
+ max_rows (int, optional): Maximum number of rows to process. Defaults to max_rows.
1822
+ progress (Progress, optional): Gradio Progress object for tracking. Defaults to Progress(track_tqdm=True).
1823
+
1824
+ Returns:
1825
+ A tuple matching the UI components updated during the original chained flow.
1826
  """
1827
 
1828
  # Load local model if it's not already loaded
 
1903
  model=model,
1904
  tokenizer=tokenizer,
1905
  assistant_model=assistant_model,
1906
+ max_rows=max_rows,
1907
+ add_existing_topics_summary_format=add_existing_topics_summary_format
1908
  )
1909
 
1910
  total_input_tokens += out_input_tokens
tools/llm_funcs.py CHANGED
@@ -4,14 +4,11 @@ import re
4
  import time
5
  import boto3
6
  import pandas as pd
7
- import json
8
- import spaces
9
  from tqdm import tqdm
10
  from huggingface_hub import hf_hub_download
11
  from typing import List, Tuple, TypeVar
12
  from google import genai as ai
13
  from google.genai import types
14
- import gradio as gr
15
  from gradio import Progress
16
 
17
  from azure.ai.inference import ChatCompletionsClient
@@ -26,15 +23,12 @@ _model = None
26
  _tokenizer = None
27
  _assistant_model = None
28
 
29
- from tools.config import AWS_REGION, LLM_TEMPERATURE, LLM_TOP_K, LLM_MIN_P, LLM_TOP_P, LLM_REPETITION_PENALTY, LLM_LAST_N_TOKENS, LLM_MAX_NEW_TOKENS, LLM_SEED, LLM_RESET, LLM_STREAM, LLM_THREADS, LLM_BATCH_SIZE, LLM_CONTEXT_LENGTH, LLM_SAMPLE, TIMEOUT_WAIT, NUMBER_OF_RETRY_ATTEMPTS, MAX_TIME_FOR_LOOP, BATCH_SIZE_DEFAULT, DEDUPLICATION_THRESHOLD, MAX_COMMENT_CHARS, CHOSEN_LOCAL_MODEL_TYPE, LOCAL_REPO_ID, LOCAL_MODEL_FILE, LOCAL_MODEL_FOLDER, HF_TOKEN, LLM_SEED, LLM_MAX_GPU_LAYERS, SPECULATIVE_DECODING, NUM_PRED_TOKENS, USE_LLAMA_CPP, COMPILE_MODE, MODEL_DTYPE, USE_BITSANDBYTES, COMPILE_TRANSFORMERS, INT8_WITH_OFFLOAD_TO_CPU, AZURE_INFERENCE_ENDPOINT, LOAD_LOCAL_MODEL_AT_START, USE_SPECULATIVE_DECODING, ASSISTANT_MODEL, LLM_STOP_STRINGS, LLM_MAX_NEW_TOKENS
30
- from tools.prompts import initial_table_assistant_prefill
31
  from tools.helper_functions import _get_env_list
32
 
33
  if SPECULATIVE_DECODING == "True": SPECULATIVE_DECODING = True
34
  else: SPECULATIVE_DECODING = False
35
 
36
- if USE_SPECULATIVE_DECODING == "True": USE_SPECULATIVE_DECODING = True
37
- else: USE_SPECULATIVE_DECODING = False
38
 
39
  if isinstance(NUM_PRED_TOKENS, str): NUM_PRED_TOKENS = int(NUM_PRED_TOKENS)
40
  if isinstance(LLM_MAX_GPU_LAYERS, str): LLM_MAX_GPU_LAYERS = int(LLM_MAX_GPU_LAYERS)
@@ -186,6 +180,7 @@ def load_model(local_model_type:str=CHOSEN_LOCAL_MODEL_TYPE,
186
  compile_mode=COMPILE_MODE,
187
  model_dtype=MODEL_DTYPE,
188
  hf_token=HF_TOKEN,
 
189
  model=None,
190
  tokenizer=None,
191
  assistant_model=None):
@@ -205,6 +200,7 @@ def load_model(local_model_type:str=CHOSEN_LOCAL_MODEL_TYPE,
205
  compile_mode (str): The compilation mode to use for the model.
206
  model_dtype (str): The data type to use for the model.
207
  hf_token (str): The Hugging Face token to use for the model.
 
208
  model (Llama/transformers model): The model to load.
209
  tokenizer (list/transformers tokenizer): The tokenizer to load.
210
  assistant_model (transformers model): The assistant model for speculative decoding.
@@ -212,7 +208,7 @@ def load_model(local_model_type:str=CHOSEN_LOCAL_MODEL_TYPE,
212
  tuple: A tuple containing:
213
  - model (Llama/transformers model): The loaded Llama.cpp/transformers model instance.
214
  - tokenizer (list/transformers tokenizer): An empty list (tokenizer is not used with Llama.cpp directly in this setup), or a transformers tokenizer.
215
- - assistant_model (transformers model): The assistant model for speculative decoding (if USE_SPECULATIVE_DECODING is True).
216
  '''
217
 
218
  if model:
@@ -263,9 +259,9 @@ def load_model(local_model_type:str=CHOSEN_LOCAL_MODEL_TYPE,
263
  try:
264
  print("GPU load variables:" , vars(gpu_config))
265
  if speculative_decoding:
266
- model = Llama(model_path=model_path, type_k=8, type_v=8, flash_attn=True, draft_model=LlamaPromptLookupDecoding(num_pred_tokens=NUM_PRED_TOKENS), **vars(gpu_config))
267
  else:
268
- model = Llama(model_path=model_path, type_k=8, type_v=8, flash_attn=True, **vars(gpu_config))
269
 
270
  except Exception as e:
271
  print("GPU load failed due to:", e, "Loading model in CPU mode")
@@ -397,7 +393,7 @@ def load_model(local_model_type:str=CHOSEN_LOCAL_MODEL_TYPE,
397
  print("GPU layers assigned to cuda:", gpu_layers)
398
 
399
  # Load assistant model for speculative decoding if enabled
400
- if USE_SPECULATIVE_DECODING and USE_LLAMA_CPP == "False" and torch_device == "cuda":
401
  print("Loading assistant model for speculative decoding:", ASSISTANT_MODEL)
402
  try:
403
  from transformers import AutoModelForCausalLM
@@ -764,7 +760,7 @@ def call_aws_claude(prompt: str, system_prompt: str, temperature: float, max_tok
764
 
765
  return response
766
 
767
- def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCPPGenerationConfig, model=None, tokenizer=None, assistant_model=None, progress=Progress(track_tqdm=False)):
768
  """
769
  This function sends a request to a transformers model (through Unsloth) with the given prompt, system prompt, and generation configuration.
770
  """
@@ -774,7 +770,7 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
774
  model = get_model()
775
  if tokenizer is None:
776
  tokenizer = get_tokenizer()
777
- if assistant_model is None and USE_SPECULATIVE_DECODING:
778
  assistant_model = get_assistant_model()
779
 
780
  if model is None or tokenizer is None:
@@ -784,10 +780,17 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
784
  def wrap_text_message(text):
785
  return [{"type": "text", "text": text}]
786
 
787
- conversation = [
788
- {"role": "system", "content": wrap_text_message(system_prompt)},
789
- {"role": "user", "content": wrap_text_message(prompt)}
790
- ]
 
 
 
 
 
 
 
791
  #print("Conversation:", conversation)
792
  #import pprint
793
  #pprint.pprint(conversation)
@@ -812,7 +815,7 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
812
 
813
  # Map LlamaCPP parameters to transformers parameters
814
  generation_kwargs = {
815
- 'LLM_MAX_NEW_TOKENS': gen_config.max_tokens,
816
  'temperature': gen_config.temperature,
817
  'top_p': gen_config.top_p,
818
  'top_k': gen_config.top_k,
@@ -834,7 +837,7 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
834
  start_time = time.time()
835
 
836
  # Use speculative decoding if assistant model is available
837
- if USE_SPECULATIVE_DECODING and assistant_model is not None:
838
  print("Using speculative decoding with assistant model")
839
  outputs = model.generate(
840
  input_ids,
@@ -853,7 +856,7 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
853
  end_time = time.time()
854
 
855
  # --- Decode and Display Results ---
856
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
857
  # To get only the model's reply, we can decode just the newly generated tokens
858
  new_tokens = outputs[0][input_ids.shape[-1]:]
859
  assistant_reply = tokenizer.decode(new_tokens, skip_special_tokens=True)
@@ -883,6 +886,7 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
883
  full_prompt = "Conversation history:\n"
884
  num_transformer_input_tokens = 0
885
  num_transformer_generated_tokens = 0
 
886
 
887
  for entry in conversation_history:
888
  role = entry['role'].capitalize() # Assuming the history is stored with 'role' and 'parts'
@@ -915,7 +919,7 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
915
  time.sleep(timeout_wait)
916
 
917
  if i == number_of_api_retry_attempts:
918
- return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
919
 
920
  elif "AWS" in model_source:
921
  for i in progress_bar:
@@ -931,7 +935,7 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
931
  time.sleep(timeout_wait)
932
 
933
  if i == number_of_api_retry_attempts:
934
- return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
935
  elif "Azure" in model_source:
936
  for i in progress_bar:
937
  try:
@@ -960,7 +964,7 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
960
  print("Call to Azure model failed:", e, " Waiting for ", str(timeout_wait), "seconds and trying again.")
961
  time.sleep(timeout_wait)
962
  if i == number_of_api_retry_attempts:
963
- return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
964
  elif "Local" in model_source:
965
  # This is the local model
966
  for i in progress_bar:
@@ -986,10 +990,10 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
986
  time.sleep(timeout_wait)
987
 
988
  if i == number_of_api_retry_attempts:
989
- return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
990
  else:
991
  print("Model source not recognised")
992
- return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
993
 
994
  # Update the conversation history with the new prompt and response
995
  conversation_history.append({'role': 'user', 'parts': [prompt]})
@@ -998,19 +1002,17 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
998
  if isinstance(response, ResponseObject):
999
  response_text = response.text
1000
  elif 'choices' in response: # LLama.cpp model response
1001
- if "gpt-oss" in model_choice:
1002
- response_text = response['choices'][0]['message']['content'].split('<|start|>assistant<|channel|>final<|message|>')[1]
1003
- else:
1004
- response_text = response['choices'][0]['message']['content']
1005
- response_text = response_text.strip()
1006
  elif model_source == "Gemini":
1007
  response_text = response.text
1008
- response_text = response_text.strip()
1009
  else: # Assume transformers model response
1010
- if "gpt-oss" in model_choice:
1011
- response_text = response.split('<|start|>assistant<|channel|>final<|message|>')[1]
1012
- else:
1013
- response_text = response
 
 
1014
 
1015
  conversation_history.append({'role': 'assistant', 'parts': [response_text]})
1016
 
 
4
  import time
5
  import boto3
6
  import pandas as pd
 
 
7
  from tqdm import tqdm
8
  from huggingface_hub import hf_hub_download
9
  from typing import List, Tuple, TypeVar
10
  from google import genai as ai
11
  from google.genai import types
 
12
  from gradio import Progress
13
 
14
  from azure.ai.inference import ChatCompletionsClient
 
23
  _tokenizer = None
24
  _assistant_model = None
25
 
26
+ from tools.config import LLM_TEMPERATURE, LLM_TOP_K, LLM_MIN_P, LLM_TOP_P, LLM_REPETITION_PENALTY, LLM_LAST_N_TOKENS, LLM_MAX_NEW_TOKENS, LLM_SEED, LLM_RESET, LLM_STREAM, LLM_THREADS, LLM_BATCH_SIZE, LLM_CONTEXT_LENGTH, LLM_SAMPLE, TIMEOUT_WAIT, NUMBER_OF_RETRY_ATTEMPTS, MAX_TIME_FOR_LOOP, BATCH_SIZE_DEFAULT, DEDUPLICATION_THRESHOLD, MAX_COMMENT_CHARS, CHOSEN_LOCAL_MODEL_TYPE, LOCAL_REPO_ID, LOCAL_MODEL_FILE, LOCAL_MODEL_FOLDER, HF_TOKEN, LLM_SEED, LLM_MAX_GPU_LAYERS, SPECULATIVE_DECODING, NUM_PRED_TOKENS, USE_LLAMA_CPP, COMPILE_MODE, MODEL_DTYPE, USE_BITSANDBYTES, COMPILE_TRANSFORMERS, INT8_WITH_OFFLOAD_TO_CPU, LOAD_LOCAL_MODEL_AT_START, ASSISTANT_MODEL, LLM_STOP_STRINGS, MULTIMODAL_PROMPT_FORMAT, KV_QUANT_LEVEL
 
27
  from tools.helper_functions import _get_env_list
28
 
29
  if SPECULATIVE_DECODING == "True": SPECULATIVE_DECODING = True
30
  else: SPECULATIVE_DECODING = False
31
 
 
 
32
 
33
  if isinstance(NUM_PRED_TOKENS, str): NUM_PRED_TOKENS = int(NUM_PRED_TOKENS)
34
  if isinstance(LLM_MAX_GPU_LAYERS, str): LLM_MAX_GPU_LAYERS = int(LLM_MAX_GPU_LAYERS)
 
180
  compile_mode=COMPILE_MODE,
181
  model_dtype=MODEL_DTYPE,
182
  hf_token=HF_TOKEN,
183
+ speculative_decoding=speculative_decoding,
184
  model=None,
185
  tokenizer=None,
186
  assistant_model=None):
 
200
  compile_mode (str): The compilation mode to use for the model.
201
  model_dtype (str): The data type to use for the model.
202
  hf_token (str): The Hugging Face token to use for the model.
203
+ speculative_decoding (bool): Whether to use speculative decoding.
204
  model (Llama/transformers model): The model to load.
205
  tokenizer (list/transformers tokenizer): The tokenizer to load.
206
  assistant_model (transformers model): The assistant model for speculative decoding.
 
208
  tuple: A tuple containing:
209
  - model (Llama/transformers model): The loaded Llama.cpp/transformers model instance.
210
  - tokenizer (list/transformers tokenizer): An empty list (tokenizer is not used with Llama.cpp directly in this setup), or a transformers tokenizer.
211
+ - assistant_model (transformers model): The assistant model for speculative decoding (if speculative_decoding is True).
212
  '''
213
 
214
  if model:
 
259
  try:
260
  print("GPU load variables:" , vars(gpu_config))
261
  if speculative_decoding:
262
+ model = Llama(model_path=model_path, type_k=KV_QUANT_LEVEL, type_v=KV_QUANT_LEVEL, flash_attn=True, draft_model=LlamaPromptLookupDecoding(num_pred_tokens=NUM_PRED_TOKENS), **vars(gpu_config))
263
  else:
264
+ model = Llama(model_path=model_path, type_k=KV_QUANT_LEVEL, type_v=KV_QUANT_LEVEL, flash_attn=True, **vars(gpu_config))
265
 
266
  except Exception as e:
267
  print("GPU load failed due to:", e, "Loading model in CPU mode")
 
393
  print("GPU layers assigned to cuda:", gpu_layers)
394
 
395
  # Load assistant model for speculative decoding if enabled
396
+ if speculative_decoding and USE_LLAMA_CPP == "False" and torch_device == "cuda":
397
  print("Loading assistant model for speculative decoding:", ASSISTANT_MODEL)
398
  try:
399
  from transformers import AutoModelForCausalLM
 
760
 
761
  return response
762
 
763
+ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCPPGenerationConfig, model=None, tokenizer=None, assistant_model=None, speculative_decoding=speculative_decoding, progress=Progress(track_tqdm=False)):
764
  """
765
  This function sends a request to a transformers model (through Unsloth) with the given prompt, system prompt, and generation configuration.
766
  """
 
770
  model = get_model()
771
  if tokenizer is None:
772
  tokenizer = get_tokenizer()
773
+ if assistant_model is None and speculative_decoding:
774
  assistant_model = get_assistant_model()
775
 
776
  if model is None or tokenizer is None:
 
780
  def wrap_text_message(text):
781
  return [{"type": "text", "text": text}]
782
 
783
+ if MULTIMODAL_PROMPT_FORMAT == "True":
784
+ conversation = [
785
+ {"role": "system", "content": wrap_text_message(system_prompt)},
786
+ {"role": "user", "content": wrap_text_message(prompt)}
787
+ ]
788
+
789
+ else:
790
+ conversation = [
791
+ {"role": "system", "content": system_prompt},
792
+ {"role": "user", "content": prompt}
793
+ ]
794
  #print("Conversation:", conversation)
795
  #import pprint
796
  #pprint.pprint(conversation)
 
815
 
816
  # Map LlamaCPP parameters to transformers parameters
817
  generation_kwargs = {
818
+ 'max_new_tokens': gen_config.max_tokens,
819
  'temperature': gen_config.temperature,
820
  'top_p': gen_config.top_p,
821
  'top_k': gen_config.top_k,
 
837
  start_time = time.time()
838
 
839
  # Use speculative decoding if assistant model is available
840
+ if speculative_decoding and assistant_model is not None:
841
  print("Using speculative decoding with assistant model")
842
  outputs = model.generate(
843
  input_ids,
 
856
  end_time = time.time()
857
 
858
  # --- Decode and Display Results ---
859
+ #generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
860
  # To get only the model's reply, we can decode just the newly generated tokens
861
  new_tokens = outputs[0][input_ids.shape[-1]:]
862
  assistant_reply = tokenizer.decode(new_tokens, skip_special_tokens=True)
 
886
  full_prompt = "Conversation history:\n"
887
  num_transformer_input_tokens = 0
888
  num_transformer_generated_tokens = 0
889
+ response_text = ""
890
 
891
  for entry in conversation_history:
892
  role = entry['role'].capitalize() # Assuming the history is stored with 'role' and 'parts'
 
919
  time.sleep(timeout_wait)
920
 
921
  if i == number_of_api_retry_attempts:
922
+ return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens
923
 
924
  elif "AWS" in model_source:
925
  for i in progress_bar:
 
935
  time.sleep(timeout_wait)
936
 
937
  if i == number_of_api_retry_attempts:
938
+ return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens
939
  elif "Azure" in model_source:
940
  for i in progress_bar:
941
  try:
 
964
  print("Call to Azure model failed:", e, " Waiting for ", str(timeout_wait), "seconds and trying again.")
965
  time.sleep(timeout_wait)
966
  if i == number_of_api_retry_attempts:
967
+ return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens
968
  elif "Local" in model_source:
969
  # This is the local model
970
  for i in progress_bar:
 
990
  time.sleep(timeout_wait)
991
 
992
  if i == number_of_api_retry_attempts:
993
+ return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens
994
  else:
995
  print("Model source not recognised")
996
+ return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens
997
 
998
  # Update the conversation history with the new prompt and response
999
  conversation_history.append({'role': 'user', 'parts': [prompt]})
 
1002
  if isinstance(response, ResponseObject):
1003
  response_text = response.text
1004
  elif 'choices' in response: # LLama.cpp model response
1005
+ if "gpt-oss" in model_choice: response_text = response['choices'][0]['message']['content'].split('<|start|>assistant<|channel|>final<|message|>')[1]
1006
+ else: response_text = response['choices'][0]['message']['content']
 
 
 
1007
  elif model_source == "Gemini":
1008
  response_text = response.text
 
1009
  else: # Assume transformers model response
1010
+ if "gpt-oss" in model_choice: response_text = response.split('<|start|>assistant<|channel|>final<|message|>')[1]
1011
+ else: response_text = response
1012
+
1013
+ # Replace multiple spaces with single space
1014
+ response_text = re.sub(r' {2,}', ' ', response_text)
1015
+ response_text = response_text.strip()
1016
 
1017
  conversation_history.append({'role': 'assistant', 'parts': [response_text]})
1018
 
tools/prompts.py CHANGED
@@ -8,12 +8,16 @@ initial_table_system_prompt = system_prompt + markdown_additional_prompt
8
 
9
  initial_table_assistant_prefill = "|"
10
 
 
 
 
 
11
  initial_table_prompt = """Your task is to create one new markdown table based on open text responses in the reponse table below with the headings 'General topic', 'Subtopic', 'Sentiment', 'Response References', and 'Summary'.
12
  In the first column identify general topics relevant to responses. Create as many general topics as you can.
13
  In the second column list subtopics relevant to responses. Make the subtopics as specific as possible and make sure they cover every issue mentioned. The subtopic should never be blank or empty.
14
  {sentiment_choices}.
15
- In the fourth column list each specific Response reference number that is relevant to the Subtopic, separated by commas. Do no write any other text in this column.
16
- In the fifth column, write a short summary of the subtopic based on relevant responses - highlight specific issues that appear.
17
  Do not add any other columns. Do not add any other text to your response.
18
 
19
  Response table:
@@ -46,8 +50,8 @@ force_single_topic_prompt = """ Assign each response to one single topic only.""
46
  add_existing_topics_prompt = """Your task is to create one new markdown table, assigning responses from the Response table below to topics.
47
  {topic_assignment}{force_single_topic}
48
  {sentiment_choices}.
49
- In the fourth column list each specific Response reference number that is relevant to the Subtopic, separated by commas. Do no write any other text in this column.
50
- In the fifth column, write a short summary of the Subtopic based on relevant responses - highlight specific issues that appear.
51
  Do not add any other columns. Do not add any other text to your response.
52
 
53
  Responses are shown in the following Response table:
 
8
 
9
  initial_table_assistant_prefill = "|"
10
 
11
+ default_response_reference_format = "list each specific Response reference number that is relevant to the Subtopic, separated by commas. Do no write any other text in this column."
12
+
13
+ single_response_reference_format = "'Response References' write the number 1 alongside each subtopic and no other text."
14
+
15
  initial_table_prompt = """Your task is to create one new markdown table based on open text responses in the reponse table below with the headings 'General topic', 'Subtopic', 'Sentiment', 'Response References', and 'Summary'.
16
  In the first column identify general topics relevant to responses. Create as many general topics as you can.
17
  In the second column list subtopics relevant to responses. Make the subtopics as specific as possible and make sure they cover every issue mentioned. The subtopic should never be blank or empty.
18
  {sentiment_choices}.
19
+ In the fourth column {response_reference_format}
20
+ In the fifth column, write a summary of the subtopic based on relevant responses - highlight specific issues that appear. {add_existing_topics_summary_format}
21
  Do not add any other columns. Do not add any other text to your response.
22
 
23
  Response table:
 
50
  add_existing_topics_prompt = """Your task is to create one new markdown table, assigning responses from the Response table below to topics.
51
  {topic_assignment}{force_single_topic}
52
  {sentiment_choices}.
53
+ In the fourth column {response_reference_format}
54
+ In the fifth column, write a summary of the Subtopic based on relevant responses - highlight specific issues that appear. {add_existing_topics_summary_format}
55
  Do not add any other columns. Do not add any other text to your response.
56
 
57
  Responses are shown in the following Response table:
tools/verify_titles.py CHANGED
@@ -492,17 +492,17 @@ def verify_titles(in_data_file,
492
 
493
  # Write outputs to csv
494
  ## Topics with references
495
- new_topic_df.to_csv(topic_table_out_path, index=None)
496
  log_files_output_paths.append(topic_table_out_path)
497
 
498
  ## Reference table mapping response numbers to topics
499
- new_reference_df.to_csv(reference_table_out_path, index=None)
500
  out_file_paths.append(reference_table_out_path)
501
 
502
  ## Unique topic list
503
  new_unique_topics_df = pd.concat([new_unique_topics_df, existing_unique_topics_df]) #.drop_duplicates('Subtopic')
504
 
505
- new_unique_topics_df.to_csv(unique_topics_df_out_path, index=None)
506
  out_file_paths.append(unique_topics_df_out_path)
507
 
508
  # Outputs for markdown table output
@@ -536,7 +536,7 @@ def verify_titles(in_data_file,
536
 
537
  formatted_initial_table_system_prompt = system_prompt.format(consultation_context=context_textbox, column_name=chosen_cols)
538
 
539
- formatted_initial_table_prompt = initial_table_prompt.format(response_table=normalised_simple_markdown_table)
540
 
541
  if prompt2: formatted_prompt2 = prompt2.format(response_table=normalised_simple_markdown_table)
542
  else: formatted_prompt2 = prompt2
@@ -561,16 +561,16 @@ def verify_titles(in_data_file,
561
  # If error in table parsing, leave function
562
  if is_error == True: raise Exception("Error in output table parsing")
563
 
564
- topic_table_df.to_csv(topic_table_out_path, index=None)
565
  out_file_paths.append(topic_table_out_path)
566
 
567
- reference_df.to_csv(reference_table_out_path, index=None)
568
  out_file_paths.append(reference_table_out_path)
569
 
570
  ## Unique topic list
571
  new_unique_topics_df = pd.concat([new_unique_topics_df, existing_unique_topics_df])
572
 
573
- new_unique_topics_df.to_csv(unique_topics_df_out_path, index=None)
574
  out_file_paths.append(unique_topics_df_out_path)
575
 
576
  whole_conversation_metadata.append(whole_conversation_metadata_str)
@@ -672,14 +672,14 @@ def verify_titles(in_data_file,
672
  basic_response_data_out_path = output_folder + file_path_details + "_simplified_data_file_" + model_choice_clean + "_temp_" + str(temperature) + ".csv"
673
 
674
  ## Reference table mapping response numbers to topics
675
- existing_reference_df.to_csv(reference_table_out_path, index=None)
676
  out_file_paths.append(reference_table_out_path)
677
 
678
  # Create final unique topics table from reference table to ensure consistent numbers
679
  final_out_unique_topics_df = existing_unique_topics_df #create_topic_summary_df_from_reference_table(existing_reference_df)
680
 
681
  ## Unique topic list
682
- final_out_unique_topics_df.to_csv(unique_topics_df_out_path, index=None)
683
  out_file_paths.append(unique_topics_df_out_path)
684
 
685
  # Ensure that we are only returning the final results to outputs
@@ -696,7 +696,7 @@ def verify_titles(in_data_file,
696
  basic_response_data = get_basic_response_data(file_data, chosen_cols, verify_titles=True)
697
 
698
  # Save simplified file data to log outputs
699
- pd.DataFrame(basic_response_data).to_csv(basic_response_data_out_path, index=None)
700
  log_files_output_paths.append(basic_response_data_out_path)
701
 
702
  # Step 1: Identify missing references
@@ -713,7 +713,7 @@ def verify_titles(in_data_file,
713
  #print("missing_df:", missing_df)
714
 
715
  missing_df_out_path = output_folder + file_path_details + "_missing_references_" + model_choice_clean + "_temp_" + str(temperature) + ".csv"
716
- missing_df.to_csv(missing_df_out_path, index=None)
717
  log_files_output_paths.append(missing_df_out_path)
718
 
719
  out_file_paths = list(set(out_file_paths))
 
492
 
493
  # Write outputs to csv
494
  ## Topics with references
495
+ new_topic_df.to_csv(topic_table_out_path, index=None, encoding='utf-8-sig')
496
  log_files_output_paths.append(topic_table_out_path)
497
 
498
  ## Reference table mapping response numbers to topics
499
+ new_reference_df.to_csv(reference_table_out_path, index=None, encoding='utf-8-sig')
500
  out_file_paths.append(reference_table_out_path)
501
 
502
  ## Unique topic list
503
  new_unique_topics_df = pd.concat([new_unique_topics_df, existing_unique_topics_df]) #.drop_duplicates('Subtopic')
504
 
505
+ new_unique_topics_df.to_csv(unique_topics_df_out_path, index=None, encoding='utf-8-sig')
506
  out_file_paths.append(unique_topics_df_out_path)
507
 
508
  # Outputs for markdown table output
 
536
 
537
  formatted_initial_table_system_prompt = system_prompt.format(consultation_context=context_textbox, column_name=chosen_cols)
538
 
539
+ formatted_initial_table_prompt = initial_table_prompt.format(response_table=normalised_simple_markdown_table, add_existing_topics_summary_format=add_existing_topics_summary_format)
540
 
541
  if prompt2: formatted_prompt2 = prompt2.format(response_table=normalised_simple_markdown_table)
542
  else: formatted_prompt2 = prompt2
 
561
  # If error in table parsing, leave function
562
  if is_error == True: raise Exception("Error in output table parsing")
563
 
564
+ topic_table_df.to_csv(topic_table_out_path, index=None, encoding='utf-8-sig')
565
  out_file_paths.append(topic_table_out_path)
566
 
567
+ reference_df.to_csv(reference_table_out_path, index=None, encoding='utf-8-sig')
568
  out_file_paths.append(reference_table_out_path)
569
 
570
  ## Unique topic list
571
  new_unique_topics_df = pd.concat([new_unique_topics_df, existing_unique_topics_df])
572
 
573
+ new_unique_topics_df.to_csv(unique_topics_df_out_path, index=None, encoding='utf-8-sig')
574
  out_file_paths.append(unique_topics_df_out_path)
575
 
576
  whole_conversation_metadata.append(whole_conversation_metadata_str)
 
672
  basic_response_data_out_path = output_folder + file_path_details + "_simplified_data_file_" + model_choice_clean + "_temp_" + str(temperature) + ".csv"
673
 
674
  ## Reference table mapping response numbers to topics
675
+ existing_reference_df.to_csv(reference_table_out_path, index=None, encoding='utf-8-sig')
676
  out_file_paths.append(reference_table_out_path)
677
 
678
  # Create final unique topics table from reference table to ensure consistent numbers
679
  final_out_unique_topics_df = existing_unique_topics_df #create_topic_summary_df_from_reference_table(existing_reference_df)
680
 
681
  ## Unique topic list
682
+ final_out_unique_topics_df.to_csv(unique_topics_df_out_path, index=None, encoding='utf-8-sig')
683
  out_file_paths.append(unique_topics_df_out_path)
684
 
685
  # Ensure that we are only returning the final results to outputs
 
696
  basic_response_data = get_basic_response_data(file_data, chosen_cols, verify_titles=True)
697
 
698
  # Save simplified file data to log outputs
699
+ pd.DataFrame(basic_response_data).to_csv(basic_response_data_out_path, index=None, encoding='utf-8-sig')
700
  log_files_output_paths.append(basic_response_data_out_path)
701
 
702
  # Step 1: Identify missing references
 
713
  #print("missing_df:", missing_df)
714
 
715
  missing_df_out_path = output_folder + file_path_details + "_missing_references_" + model_choice_clean + "_temp_" + str(temperature) + ".csv"
716
+ missing_df.to_csv(missing_df_out_path, index=None, encoding='utf-8-sig')
717
  log_files_output_paths.append(missing_df_out_path)
718
 
719
  out_file_paths = list(set(out_file_paths))
windows_install_llama-cpp-python.txt CHANGED
@@ -77,13 +77,15 @@ set PKG_CONFIG_PATH=C:\<path-to-openblas>\OpenBLAS\lib\pkgconfig # Set this in e
77
 
78
  pip install llama-cpp-python==0.3.16 --force-reinstall --verbose --no-cache-dir -Ccmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS;-DBLAS_INCLUDE_DIRS=C:/<path-to-openblas>/OpenBLAS/include;-DBLAS_LIBRARIES=C:/<path-to-openblas>/OpenBLAS/lib/libopenblas.lib"
79
 
 
 
80
  or to make a wheel:
81
 
82
  pip install llama-cpp-python==0.3.16 --wheel-dir dist --verbose --no-cache-dir -Ccmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS;-DBLAS_INCLUDE_DIRS=C:/<path-to-openblas>/OpenBLAS/include;-DBLAS_LIBRARIES=C:/<path-to-openblas>/OpenBLAS/lib/libopenblas.lib"
83
 
84
- pip wheel llama-cpp-python==0.3.16 --wheel-dir dist --verbose --no-cache-dir -Ccmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS;-DBLAS_INCLUDE_DIRS=C:/Users/spedrickcase/libs/OpenBLAS/include;-DBLAS_LIBRARIES=C:/Users/spedrickcase/libs/OpenBLAS/lib/libopenblas.lib"
 
85
 
86
- C:/Users/spedrickcase/libs
87
 
88
  ## With Cuda (NVIDIA GPUs only)
89
 
 
77
 
78
  pip install llama-cpp-python==0.3.16 --force-reinstall --verbose --no-cache-dir -Ccmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS;-DBLAS_INCLUDE_DIRS=C:/<path-to-openblas>/OpenBLAS/include;-DBLAS_LIBRARIES=C:/<path-to-openblas>/OpenBLAS/lib/libopenblas.lib"
79
 
80
+ pip install llama-cpp-python==0.3.16 --verbose --no-cache-dir -Ccmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS;-DBLAS_INCLUDE_DIRS=C:/Users/s_cas/libs/OpenBLAS/include;-DBLAS_LIBRARIES=C:/Users/s_cas/OpenBLAS/lib/libopenblas.lib";-DPKG_CONFIG_PATH=C:/users/s_cas/openblas/lib/pkgconfig"
81
+
82
  or to make a wheel:
83
 
84
  pip install llama-cpp-python==0.3.16 --wheel-dir dist --verbose --no-cache-dir -Ccmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS;-DBLAS_INCLUDE_DIRS=C:/<path-to-openblas>/OpenBLAS/include;-DBLAS_LIBRARIES=C:/<path-to-openblas>/OpenBLAS/lib/libopenblas.lib"
85
 
86
+ pip wheel llama-cpp-python==0.3.16 --wheel-dir dist --verbose --no-cache-dir -Ccmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS;-DBLAS_INCLUDE_DIRS=C:/Users/<user>/libs/OpenBLAS/include;-DBLAS_LIBRARIES=C:/Users/<user>/libs/OpenBLAS/lib/libopenblas.lib"
87
+
88
 
 
89
 
90
  ## With Cuda (NVIDIA GPUs only)
91