Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
bd1a015
1
Parent(s):
bd19985
Generally improved inference for low vram systems, unsloth usage improvements, updated packages, switched default local model to Qwen 3 4b
Browse files- README.md +1 -1
- app.py +12 -11
- tools/aws_functions.py +0 -3
- tools/combine_sheets_into_xlsx.py +1 -2
- tools/config.py +54 -17
- tools/custom_csvlogger.py +2 -5
- tools/dedup_summaries.py +38 -22
- tools/llm_api_call.py +100 -26
- tools/llm_funcs.py +37 -35
- tools/prompts.py +8 -4
- tools/verify_titles.py +11 -11
- windows_install_llama-cpp-python.txt +4 -2
README.md
CHANGED
|
@@ -97,7 +97,7 @@ The repo provides several requirements files that are relevant for different sit
|
|
| 97 |
|
| 98 |
- **requirements_no_local**: Can be used to install the app without local model inference for a more lightweight installation.
|
| 99 |
- **requirements_gpu.txt**: Used for Python 3.11 GPU-enabled environments. Uncomment the requirements under 'Windows' for Windows compatibility (CUDA 12.4).
|
| 100 |
-
- **requirements_cpu.txt**: Used for Python 3.11 CPU-only environments. Uncomment the requirements under 'Windows' for Windows compatibility.
|
| 101 |
- **requirements.txt**: Used for the Python 3.10 GPU-enabled environment on Hugging Face spaces (CUDA 12.4).
|
| 102 |
|
| 103 |
2. **Install packages from the requirements file:**
|
|
|
|
| 97 |
|
| 98 |
- **requirements_no_local**: Can be used to install the app without local model inference for a more lightweight installation.
|
| 99 |
- **requirements_gpu.txt**: Used for Python 3.11 GPU-enabled environments. Uncomment the requirements under 'Windows' for Windows compatibility (CUDA 12.4).
|
| 100 |
+
- **requirements_cpu.txt**: Used for Python 3.11 CPU-only environments. Uncomment the requirements under 'Windows' for Windows compatibility. Make sure you have [Openblas](https://github.com/OpenMathLib/OpenBLAS) installed!
|
| 101 |
- **requirements.txt**: Used for the Python 3.10 GPU-enabled environment on Hugging Face spaces (CUDA 12.4).
|
| 102 |
|
| 103 |
2. **Install packages from the requirements file:**
|
app.py
CHANGED
|
@@ -51,12 +51,9 @@ if DYNAMODB_USAGE_LOG_HEADERS: DYNAMODB_USAGE_LOG_HEADERS = _get_env_list(DYNAMO
|
|
| 51 |
|
| 52 |
today_rev = datetime.now().strftime("%Y%m%d")
|
| 53 |
|
| 54 |
-
if RUN_LOCAL_MODEL == "1":
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
default_model_choice = "anthropic.claude-3-haiku-20240307-v1:0"
|
| 58 |
-
else:
|
| 59 |
-
default_model_choice = "gemini-2.5-flash"
|
| 60 |
|
| 61 |
# Create the gradio interface
|
| 62 |
app = gr.Blocks(theme = gr.themes.Default(primary_hue="blue"), fill_width=True)
|
|
@@ -119,6 +116,7 @@ with app:
|
|
| 119 |
summarised_references_markdown = gr.Markdown("", visible=False)
|
| 120 |
summarised_outputs_list = gr.Dropdown(value= list(), choices= list(), visible=False, label="List of summarised outputs", allow_custom_value=True)
|
| 121 |
latest_summary_completed_num = gr.Number(0, visible=False)
|
|
|
|
| 122 |
|
| 123 |
summary_xlsx_output_files_list = gr.Dropdown(value= list(), choices= list(), visible=False, label="List of xlsx summary output files", allow_custom_value=True)
|
| 124 |
|
|
@@ -192,7 +190,7 @@ with app:
|
|
| 192 |
extract_topics_btn = gr.Button("1. Extract topics", variant="secondary")
|
| 193 |
|
| 194 |
with gr.Row(equal_height=True):
|
| 195 |
-
output_messages_textbox = gr.Textbox(value="", label="Output messages", scale=1, interactive=False)
|
| 196 |
topic_extraction_output_files_xlsx = gr.File(label="Overall summary xlsx file", scale=1, interactive=False)
|
| 197 |
topic_extraction_output_files = gr.File(label="Extract topics output files", scale=1, interactive=False)
|
| 198 |
|
|
@@ -410,7 +408,8 @@ with app:
|
|
| 410 |
hf_api_key_textbox,
|
| 411 |
azure_api_key_textbox,
|
| 412 |
output_folder_state,
|
| 413 |
-
logged_content_df
|
|
|
|
| 414 |
outputs=[display_topic_table_markdown,
|
| 415 |
master_topic_df_state,
|
| 416 |
master_unique_topics_df_state,
|
|
@@ -432,7 +431,8 @@ with app:
|
|
| 432 |
output_tokens_num,
|
| 433 |
number_of_calls_num,
|
| 434 |
output_messages_textbox,
|
| 435 |
-
logged_content_df
|
|
|
|
| 436 |
api_name="extract_topics", show_progress_on=output_messages_textbox).\
|
| 437 |
success(lambda *args: usage_callback.flag(list(args), save_to_csv=SAVE_LOGS_TO_CSV, save_to_dynamodb=SAVE_LOGS_TO_DYNAMODB, dynamodb_table_name=USAGE_LOG_DYNAMODB_TABLE_NAME, dynamodb_headers=DYNAMODB_USAGE_LOG_HEADERS, replacement_headers=CSV_USAGE_LOG_HEADERS), [session_hash_textbox, original_data_file_name_textbox, in_colnames, model_choice, conversation_metadata_textbox_placeholder, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number, cost_code_choice_drop], None, preprocess=False, api_name="usage_logs").\
|
| 438 |
then(collect_output_csvs_and_create_excel_output, inputs=[in_data_files, in_colnames, original_data_file_name_textbox, in_group_col, model_choice, master_reference_df_state, master_unique_topics_df_state, summarised_output_df, missing_df_state, in_excel_sheets, usage_logs_state, model_name_map_state, output_folder_state], outputs=[topic_extraction_output_files_xlsx, summary_xlsx_output_files_list])
|
|
@@ -518,7 +518,8 @@ with app:
|
|
| 518 |
log_files_output_list_state,
|
| 519 |
model_name_map_state,
|
| 520 |
usage_logs_state,
|
| 521 |
-
logged_content_df
|
|
|
|
| 522 |
],
|
| 523 |
outputs=[
|
| 524 |
display_topic_table_markdown,
|
|
@@ -603,7 +604,7 @@ with app:
|
|
| 603 |
success(fn=join_cols_onto_reference_df, inputs=[master_reference_df_state, file_data_state, join_colnames, reference_df_data_file_name_textbox], outputs=[master_reference_df_state_joined, out_join_files])
|
| 604 |
|
| 605 |
# Export to xlsx file
|
| 606 |
-
export_xlsx_btn.click(collect_output_csvs_and_create_excel_output, inputs=[in_data_files, in_colnames, original_data_file_name_textbox, in_group_col, model_choice, master_reference_df_state, master_unique_topics_df_state, summarised_output_df, missing_df_state, in_excel_sheets, usage_logs_state, model_name_map_state, output_folder_state], outputs=[out_xlsx_files], api_name="export_xlsx")
|
| 607 |
|
| 608 |
# If relevant environment variable is set, load in the default cost code file from S3 or locally
|
| 609 |
if GET_COST_CODES == "True" and (COST_CODES_PATH or S3_COST_CODES_PATH):
|
|
|
|
| 51 |
|
| 52 |
today_rev = datetime.now().strftime("%Y%m%d")
|
| 53 |
|
| 54 |
+
if RUN_LOCAL_MODEL == "1": default_model_choice = CHOSEN_LOCAL_MODEL_TYPE
|
| 55 |
+
elif RUN_AWS_FUNCTIONS == "1": default_model_choice = "anthropic.claude-3-haiku-20240307-v1:0"
|
| 56 |
+
else: default_model_choice = "gemini-2.5-flash"
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
# Create the gradio interface
|
| 59 |
app = gr.Blocks(theme = gr.themes.Default(primary_hue="blue"), fill_width=True)
|
|
|
|
| 116 |
summarised_references_markdown = gr.Markdown("", visible=False)
|
| 117 |
summarised_outputs_list = gr.Dropdown(value= list(), choices= list(), visible=False, label="List of summarised outputs", allow_custom_value=True)
|
| 118 |
latest_summary_completed_num = gr.Number(0, visible=False)
|
| 119 |
+
add_existing_topics_summary_format_textbox = gr.Textbox(value="", visible=False, label="Add existing topics summary format")
|
| 120 |
|
| 121 |
summary_xlsx_output_files_list = gr.Dropdown(value= list(), choices= list(), visible=False, label="List of xlsx summary output files", allow_custom_value=True)
|
| 122 |
|
|
|
|
| 190 |
extract_topics_btn = gr.Button("1. Extract topics", variant="secondary")
|
| 191 |
|
| 192 |
with gr.Row(equal_height=True):
|
| 193 |
+
output_messages_textbox = gr.Textbox(value="", label="Output messages", scale=1, interactive=False, lines=4)
|
| 194 |
topic_extraction_output_files_xlsx = gr.File(label="Overall summary xlsx file", scale=1, interactive=False)
|
| 195 |
topic_extraction_output_files = gr.File(label="Extract topics output files", scale=1, interactive=False)
|
| 196 |
|
|
|
|
| 408 |
hf_api_key_textbox,
|
| 409 |
azure_api_key_textbox,
|
| 410 |
output_folder_state,
|
| 411 |
+
logged_content_df,
|
| 412 |
+
add_existing_topics_summary_format_textbox],
|
| 413 |
outputs=[display_topic_table_markdown,
|
| 414 |
master_topic_df_state,
|
| 415 |
master_unique_topics_df_state,
|
|
|
|
| 431 |
output_tokens_num,
|
| 432 |
number_of_calls_num,
|
| 433 |
output_messages_textbox,
|
| 434 |
+
logged_content_df,
|
| 435 |
+
add_existing_topics_summary_format_textbox],
|
| 436 |
api_name="extract_topics", show_progress_on=output_messages_textbox).\
|
| 437 |
success(lambda *args: usage_callback.flag(list(args), save_to_csv=SAVE_LOGS_TO_CSV, save_to_dynamodb=SAVE_LOGS_TO_DYNAMODB, dynamodb_table_name=USAGE_LOG_DYNAMODB_TABLE_NAME, dynamodb_headers=DYNAMODB_USAGE_LOG_HEADERS, replacement_headers=CSV_USAGE_LOG_HEADERS), [session_hash_textbox, original_data_file_name_textbox, in_colnames, model_choice, conversation_metadata_textbox_placeholder, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number, cost_code_choice_drop], None, preprocess=False, api_name="usage_logs").\
|
| 438 |
then(collect_output_csvs_and_create_excel_output, inputs=[in_data_files, in_colnames, original_data_file_name_textbox, in_group_col, model_choice, master_reference_df_state, master_unique_topics_df_state, summarised_output_df, missing_df_state, in_excel_sheets, usage_logs_state, model_name_map_state, output_folder_state], outputs=[topic_extraction_output_files_xlsx, summary_xlsx_output_files_list])
|
|
|
|
| 518 |
log_files_output_list_state,
|
| 519 |
model_name_map_state,
|
| 520 |
usage_logs_state,
|
| 521 |
+
logged_content_df,
|
| 522 |
+
add_existing_topics_summary_format_textbox
|
| 523 |
],
|
| 524 |
outputs=[
|
| 525 |
display_topic_table_markdown,
|
|
|
|
| 604 |
success(fn=join_cols_onto_reference_df, inputs=[master_reference_df_state, file_data_state, join_colnames, reference_df_data_file_name_textbox], outputs=[master_reference_df_state_joined, out_join_files])
|
| 605 |
|
| 606 |
# Export to xlsx file
|
| 607 |
+
export_xlsx_btn.click(collect_output_csvs_and_create_excel_output, inputs=[in_data_files, in_colnames, original_data_file_name_textbox, in_group_col, model_choice, master_reference_df_state, master_unique_topics_df_state, summarised_output_df, missing_df_state, in_excel_sheets, usage_logs_state, model_name_map_state, output_folder_state], outputs=[out_xlsx_files, summary_xlsx_output_files_list], api_name="export_xlsx")
|
| 608 |
|
| 609 |
# If relevant environment variable is set, load in the default cost code file from S3 or locally
|
| 610 |
if GET_COST_CODES == "True" and (COST_CODES_PATH or S3_COST_CODES_PATH):
|
tools/aws_functions.py
CHANGED
|
@@ -15,9 +15,6 @@ def connect_to_bedrock_runtime(model_name_map:dict, model_choice:str, aws_access
|
|
| 15 |
if RUN_AWS_FUNCTIONS == "1" and PRIORITISE_SSO_OVER_AWS_ENV_ACCESS_KEYS == "1":
|
| 16 |
print("Connecting to Bedrock via existing SSO connection")
|
| 17 |
bedrock_runtime = boto3.client('bedrock-runtime', region_name=AWS_REGION)
|
| 18 |
-
elif RUN_AWS_FUNCTIONS == "1" and PRIORITISE_SSO_OVER_AWS_ENV_ACCESS_KEYS == "1":
|
| 19 |
-
print("Connecting to Bedrock via existing SSO connection")
|
| 20 |
-
bedrock_runtime = boto3.client('bedrock-runtime', region_name=AWS_REGION)
|
| 21 |
elif aws_access_key_textbox and aws_secret_key_textbox:
|
| 22 |
print("Connecting to Bedrock using AWS access key and secret keys from user input.")
|
| 23 |
bedrock_runtime = boto3.client('bedrock-runtime',
|
|
|
|
| 15 |
if RUN_AWS_FUNCTIONS == "1" and PRIORITISE_SSO_OVER_AWS_ENV_ACCESS_KEYS == "1":
|
| 16 |
print("Connecting to Bedrock via existing SSO connection")
|
| 17 |
bedrock_runtime = boto3.client('bedrock-runtime', region_name=AWS_REGION)
|
|
|
|
|
|
|
|
|
|
| 18 |
elif aws_access_key_textbox and aws_secret_key_textbox:
|
| 19 |
print("Connecting to Bedrock using AWS access key and secret keys from user input.")
|
| 20 |
bedrock_runtime = boto3.client('bedrock-runtime',
|
tools/combine_sheets_into_xlsx.py
CHANGED
|
@@ -380,8 +380,7 @@ def collect_output_csvs_and_create_excel_output(in_data_files:List, chosen_cols:
|
|
| 380 |
xlsx_output_filenames = [xlsx_output_filename]
|
| 381 |
|
| 382 |
# Delete intermediate csv files
|
| 383 |
-
for csv_file in new_csv_files:
|
| 384 |
-
os.remove(csv_file)
|
| 385 |
|
| 386 |
return xlsx_output_filenames, xlsx_output_filenames
|
| 387 |
|
|
|
|
| 380 |
xlsx_output_filenames = [xlsx_output_filename]
|
| 381 |
|
| 382 |
# Delete intermediate csv files
|
| 383 |
+
for csv_file in new_csv_files: os.remove(csv_file)
|
|
|
|
| 384 |
|
| 385 |
return xlsx_output_filenames, xlsx_output_filenames
|
| 386 |
|
tools/config.py
CHANGED
|
@@ -190,7 +190,7 @@ if LOGGING == 'True':
|
|
| 190 |
###
|
| 191 |
# App run variables
|
| 192 |
###
|
| 193 |
-
OUTPUT_DEBUG_FILES = get_or_create_env_var('OUTPUT_DEBUG_FILES', '
|
| 194 |
|
| 195 |
TIMEOUT_WAIT = int(get_or_create_env_var('TIMEOUT_WAIT', '30')) # Maximum number of seconds to wait for a response from the LLM
|
| 196 |
NUMBER_OF_RETRY_ATTEMPTS = int(get_or_create_env_var('NUMBER_OF_RETRY_ATTEMPTS', '5')) # Maximum number of times to retry a request to the LLM
|
|
@@ -229,7 +229,7 @@ model_full_names = list()
|
|
| 229 |
model_short_names = list()
|
| 230 |
model_source = list()
|
| 231 |
|
| 232 |
-
CHOSEN_LOCAL_MODEL_TYPE = get_or_create_env_var("CHOSEN_LOCAL_MODEL_TYPE", "
|
| 233 |
|
| 234 |
if RUN_LOCAL_MODEL == "1" and CHOSEN_LOCAL_MODEL_TYPE:
|
| 235 |
model_full_names.append(CHOSEN_LOCAL_MODEL_TYPE)
|
|
@@ -264,8 +264,21 @@ model_name_map = {
|
|
| 264 |
HF_TOKEN = get_or_create_env_var('HF_TOKEN', '')
|
| 265 |
|
| 266 |
LOAD_LOCAL_MODEL_AT_START = get_or_create_env_var('LOAD_LOCAL_MODEL_AT_START', 'True')
|
| 267 |
-
USE_LLAMA_CPP = get_or_create_env_var('USE_LLAMA_CPP', 'True') # Llama.cpp or transformers
|
| 268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
GEMMA2_REPO_ID = get_or_create_env_var("GEMMA2_2B_REPO_ID", "unsloth/gemma-2-it-GGUF")
|
| 271 |
GEMMA2_REPO_TRANSFORMERS_ID = get_or_create_env_var("GEMMA2_2B_REPO_TRANSFORMERS_ID", "unsloth/gemma-2-2b-it-bnb-4bit")
|
|
@@ -293,18 +306,31 @@ GEMMA3_4B_MODEL_FOLDER = get_or_create_env_var("GEMMA3_4B_MODEL_FOLDER", "model/
|
|
| 293 |
|
| 294 |
GPT_OSS_REPO_ID = get_or_create_env_var("GPT_OSS_REPO_ID", "unsloth/gpt-oss-20b-GGUF")
|
| 295 |
GPT_OSS_REPO_TRANSFORMERS_ID = get_or_create_env_var("GPT_OSS_REPO_TRANSFORMERS_ID", "unsloth/gpt-oss-20b-unsloth-bnb-4bit")
|
| 296 |
-
if USE_LLAMA_CPP == "False":
|
| 297 |
-
GPT_OSS_REPO_ID = GPT_OSS_REPO_TRANSFORMERS_ID
|
| 298 |
|
| 299 |
GPT_OSS_MODEL_FILE = get_or_create_env_var("GPT_OSS_MODEL_FILE", "gpt-oss-20b-F16.gguf")
|
| 300 |
GPT_OSS_MODEL_FOLDER = get_or_create_env_var("GPT_OSS_MODEL_FOLDER", "model/gpt_oss")
|
| 301 |
|
| 302 |
USE_SPECULATIVE_DECODING = get_or_create_env_var("USE_SPECULATIVE_DECODING", "False")
|
| 303 |
-
ASSISTANT_MODEL = get_or_create_env_var("ASSISTANT_MODEL", "unsloth/gemma-3-270m-it")
|
| 304 |
|
| 305 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
|
| 309 |
if CHOSEN_LOCAL_MODEL_TYPE == "Gemma 2b":
|
| 310 |
LOCAL_REPO_ID = GEMMA2_REPO_ID
|
|
@@ -322,34 +348,45 @@ elif CHOSEN_LOCAL_MODEL_TYPE == "Gemma 3 4B":
|
|
| 322 |
LOCAL_MODEL_FILE = GEMMA3_4B_MODEL_FILE
|
| 323 |
LOCAL_MODEL_FOLDER = GEMMA3_4B_MODEL_FOLDER
|
| 324 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
elif CHOSEN_LOCAL_MODEL_TYPE == "gpt-oss-20b":
|
| 326 |
LOCAL_REPO_ID = GPT_OSS_REPO_ID
|
| 327 |
LOCAL_MODEL_FILE = GPT_OSS_MODEL_FILE
|
| 328 |
LOCAL_MODEL_FOLDER = GPT_OSS_MODEL_FOLDER
|
| 329 |
|
| 330 |
LLM_MAX_GPU_LAYERS = int(get_or_create_env_var('LLM_MAX_GPU_LAYERS','-1')) # Maximum possible
|
| 331 |
-
LLM_TEMPERATURE = float(get_or_create_env_var('LLM_TEMPERATURE', '0.
|
| 332 |
LLM_TOP_K = int(get_or_create_env_var('LLM_TOP_K','64')) # https://docs.unsloth.ai/basics/gemma-3-how-to-run-and-fine-tune
|
| 333 |
LLM_MIN_P = float(get_or_create_env_var('LLM_MIN_P', '0'))
|
| 334 |
LLM_TOP_P = float(get_or_create_env_var('LLM_TOP_P', '0.95'))
|
| 335 |
LLM_REPETITION_PENALTY = float(get_or_create_env_var('LLM_REPETITION_PENALTY', '1.0'))
|
| 336 |
|
| 337 |
LLM_LAST_N_TOKENS = int(get_or_create_env_var('LLM_LAST_N_TOKENS', '512'))
|
| 338 |
-
LLM_MAX_NEW_TOKENS = int(get_or_create_env_var('LLM_MAX_NEW_TOKENS', '
|
| 339 |
LLM_SEED = int(get_or_create_env_var('LLM_SEED', '42'))
|
| 340 |
LLM_RESET = get_or_create_env_var('LLM_RESET', 'True')
|
| 341 |
LLM_STREAM = get_or_create_env_var('LLM_STREAM', 'True')
|
| 342 |
LLM_THREADS = int(get_or_create_env_var('LLM_THREADS', '-1'))
|
| 343 |
-
LLM_BATCH_SIZE = int(get_or_create_env_var('LLM_BATCH_SIZE', '
|
| 344 |
-
LLM_CONTEXT_LENGTH = int(get_or_create_env_var('LLM_CONTEXT_LENGTH', '
|
| 345 |
LLM_SAMPLE = get_or_create_env_var('LLM_SAMPLE', 'True')
|
| 346 |
-
LLM_STOP_STRINGS = get_or_create_env_var('LLM_STOP_STRINGS', r"['\n\n\n\n']")
|
|
|
|
| 347 |
SPECULATIVE_DECODING = get_or_create_env_var('SPECULATIVE_DECODING', 'False')
|
| 348 |
NUM_PRED_TOKENS = int(get_or_create_env_var('NUM_PRED_TOKENS', '2'))
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
|
| 354 |
# Transformers variables
|
| 355 |
COMPILE_TRANSFORMERS = get_or_create_env_var('COMPILE_TRANSFORMERS', 'False') # Whether to compile transformers models
|
|
|
|
| 190 |
###
|
| 191 |
# App run variables
|
| 192 |
###
|
| 193 |
+
OUTPUT_DEBUG_FILES = get_or_create_env_var('OUTPUT_DEBUG_FILES', 'True') # Whether to output debug files
|
| 194 |
|
| 195 |
TIMEOUT_WAIT = int(get_or_create_env_var('TIMEOUT_WAIT', '30')) # Maximum number of seconds to wait for a response from the LLM
|
| 196 |
NUMBER_OF_RETRY_ATTEMPTS = int(get_or_create_env_var('NUMBER_OF_RETRY_ATTEMPTS', '5')) # Maximum number of times to retry a request to the LLM
|
|
|
|
| 229 |
model_short_names = list()
|
| 230 |
model_source = list()
|
| 231 |
|
| 232 |
+
CHOSEN_LOCAL_MODEL_TYPE = get_or_create_env_var("CHOSEN_LOCAL_MODEL_TYPE", "Qwen 3 4B") # Gemma 3 1B # "Gemma 2b" # "Gemma 3 4B"
|
| 233 |
|
| 234 |
if RUN_LOCAL_MODEL == "1" and CHOSEN_LOCAL_MODEL_TYPE:
|
| 235 |
model_full_names.append(CHOSEN_LOCAL_MODEL_TYPE)
|
|
|
|
| 264 |
HF_TOKEN = get_or_create_env_var('HF_TOKEN', '')
|
| 265 |
|
| 266 |
LOAD_LOCAL_MODEL_AT_START = get_or_create_env_var('LOAD_LOCAL_MODEL_AT_START', 'True')
|
|
|
|
| 267 |
|
| 268 |
+
# If you are using a system with low VRAM, you can set this to True to reduce the memory requirements
|
| 269 |
+
LOW_VRAM_SYSTEM = get_or_create_env_var('LOW_VRAM_SYSTEM', 'False')
|
| 270 |
+
|
| 271 |
+
if LOW_VRAM_SYSTEM == 'True':
|
| 272 |
+
print("Changing settings for low VRAM system")
|
| 273 |
+
USE_LLAMA_CPP = get_or_create_env_var('USE_LLAMA_CPP', 'True')
|
| 274 |
+
LLM_MAX_NEW_TOKENS = int(get_or_create_env_var('LLM_MAX_NEW_TOKENS', '4096'))
|
| 275 |
+
LLM_CONTEXT_LENGTH = int(get_or_create_env_var('LLM_CONTEXT_LENGTH', '8192'))
|
| 276 |
+
LLM_BATCH_SIZE = int(get_or_create_env_var('LLM_BATCH_SIZE', '512'))
|
| 277 |
+
KV_QUANT_LEVEL = int(get_or_create_env_var('KV_QUANT_LEVEL', '2')) # 2 is equivalent to q4_0, 8 is q8_0
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
USE_LLAMA_CPP = get_or_create_env_var('USE_LLAMA_CPP', 'True') # Llama.cpp or transformers with unsloth
|
| 282 |
|
| 283 |
GEMMA2_REPO_ID = get_or_create_env_var("GEMMA2_2B_REPO_ID", "unsloth/gemma-2-it-GGUF")
|
| 284 |
GEMMA2_REPO_TRANSFORMERS_ID = get_or_create_env_var("GEMMA2_2B_REPO_TRANSFORMERS_ID", "unsloth/gemma-2-2b-it-bnb-4bit")
|
|
|
|
| 306 |
|
| 307 |
GPT_OSS_REPO_ID = get_or_create_env_var("GPT_OSS_REPO_ID", "unsloth/gpt-oss-20b-GGUF")
|
| 308 |
GPT_OSS_REPO_TRANSFORMERS_ID = get_or_create_env_var("GPT_OSS_REPO_TRANSFORMERS_ID", "unsloth/gpt-oss-20b-unsloth-bnb-4bit")
|
| 309 |
+
if USE_LLAMA_CPP == "False": GPT_OSS_REPO_ID = GPT_OSS_REPO_TRANSFORMERS_ID
|
|
|
|
| 310 |
|
| 311 |
GPT_OSS_MODEL_FILE = get_or_create_env_var("GPT_OSS_MODEL_FILE", "gpt-oss-20b-F16.gguf")
|
| 312 |
GPT_OSS_MODEL_FOLDER = get_or_create_env_var("GPT_OSS_MODEL_FOLDER", "model/gpt_oss")
|
| 313 |
|
| 314 |
USE_SPECULATIVE_DECODING = get_or_create_env_var("USE_SPECULATIVE_DECODING", "False")
|
|
|
|
| 315 |
|
| 316 |
+
if CHOSEN_LOCAL_MODEL_TYPE == "Gemma 3 4B": ASSISTANT_MODEL = get_or_create_env_var("ASSISTANT_MODEL", "unsloth/gemma-3-270m-it")
|
| 317 |
+
elif CHOSEN_LOCAL_MODEL_TYPE == "Qwen 3 4B": ASSISTANT_MODEL = get_or_create_env_var("ASSISTANT_MODEL", "unsloth/Qwen3-0.6B")
|
| 318 |
+
|
| 319 |
+
DRAFT_MODEL_LOC = get_or_create_env_var("DRAFT_MODEL_LOC", ".cache/llama.cpp/")
|
| 320 |
+
|
| 321 |
+
GEMMA3_DRAFT_MODEL_LOC = get_or_create_env_var("GEMMA3_DRAFT_MODEL_LOC", DRAFT_MODEL_LOC + "unsloth_gemma-3-270m-it-qat-GGUF_gemma-3-270m-it-qat-F16.gguf")
|
| 322 |
+
|
| 323 |
+
GEMMA3_4B_DRAFT_MODEL_LOC = get_or_create_env_var("GEMMA3_4B_DRAFT_MODEL_LOC", DRAFT_MODEL_LOC + "unsloth_gemma-3-4b-it-qat-GGUF_gemma-3-4b-it-qat-Q4_K_M.gguf")
|
| 324 |
|
| 325 |
+
QWEN3_4B_REPO_ID = get_or_create_env_var("QWEN3_4B_REPO_ID", "unsloth/Qwen3-4B-Instruct-2507-GGUF")
|
| 326 |
+
QWEN3_4B_REPO_TRANSFORMERS_ID = get_or_create_env_var("QWEN3_4B_REPO_TRANSFORMERS_ID", "unsloth/Qwen3-4B-unsloth-bnb-4bit")
|
| 327 |
+
if USE_LLAMA_CPP == "False": QWEN3_4B_REPO_ID = QWEN3_4B_REPO_TRANSFORMERS_ID
|
| 328 |
+
|
| 329 |
+
QWEN3_4B_MODEL_FILE = get_or_create_env_var("QWEN3_4B_MODEL_FILE", "Qwen3-4B-Instruct-2507-Q4_K_M.gguf")
|
| 330 |
+
QWEN3_4B_MODEL_FOLDER = get_or_create_env_var("QWEN3_4B_MODEL_FOLDER", "model/qwen")
|
| 331 |
+
|
| 332 |
+
QWEN3_DRAFT_MODEL_LOC = get_or_create_env_var("QWEN3_DRAFT_MODEL_LOC", DRAFT_MODEL_LOC + "Qwen3-0.6B-Q8_0.gguf")
|
| 333 |
+
QWEN3_4B_DRAFT_MODEL_LOC = get_or_create_env_var("QWEN3_4B_DRAFT_MODEL_LOC", DRAFT_MODEL_LOC + "Qwen3-4B-Instruct-2507-Q4_K_M.gguf")
|
| 334 |
|
| 335 |
if CHOSEN_LOCAL_MODEL_TYPE == "Gemma 2b":
|
| 336 |
LOCAL_REPO_ID = GEMMA2_REPO_ID
|
|
|
|
| 348 |
LOCAL_MODEL_FILE = GEMMA3_4B_MODEL_FILE
|
| 349 |
LOCAL_MODEL_FOLDER = GEMMA3_4B_MODEL_FOLDER
|
| 350 |
|
| 351 |
+
elif CHOSEN_LOCAL_MODEL_TYPE == "Qwen 3 4B":
|
| 352 |
+
LOCAL_REPO_ID = QWEN3_4B_REPO_ID
|
| 353 |
+
LOCAL_MODEL_FILE = QWEN3_4B_MODEL_FILE
|
| 354 |
+
LOCAL_MODEL_FOLDER = QWEN3_4B_MODEL_FOLDER
|
| 355 |
+
|
| 356 |
elif CHOSEN_LOCAL_MODEL_TYPE == "gpt-oss-20b":
|
| 357 |
LOCAL_REPO_ID = GPT_OSS_REPO_ID
|
| 358 |
LOCAL_MODEL_FILE = GPT_OSS_MODEL_FILE
|
| 359 |
LOCAL_MODEL_FOLDER = GPT_OSS_MODEL_FOLDER
|
| 360 |
|
| 361 |
LLM_MAX_GPU_LAYERS = int(get_or_create_env_var('LLM_MAX_GPU_LAYERS','-1')) # Maximum possible
|
| 362 |
+
LLM_TEMPERATURE = float(get_or_create_env_var('LLM_TEMPERATURE', '0.6'))
|
| 363 |
LLM_TOP_K = int(get_or_create_env_var('LLM_TOP_K','64')) # https://docs.unsloth.ai/basics/gemma-3-how-to-run-and-fine-tune
|
| 364 |
LLM_MIN_P = float(get_or_create_env_var('LLM_MIN_P', '0'))
|
| 365 |
LLM_TOP_P = float(get_or_create_env_var('LLM_TOP_P', '0.95'))
|
| 366 |
LLM_REPETITION_PENALTY = float(get_or_create_env_var('LLM_REPETITION_PENALTY', '1.0'))
|
| 367 |
|
| 368 |
LLM_LAST_N_TOKENS = int(get_or_create_env_var('LLM_LAST_N_TOKENS', '512'))
|
| 369 |
+
LLM_MAX_NEW_TOKENS = int(get_or_create_env_var('LLM_MAX_NEW_TOKENS', '8192'))
|
| 370 |
LLM_SEED = int(get_or_create_env_var('LLM_SEED', '42'))
|
| 371 |
LLM_RESET = get_or_create_env_var('LLM_RESET', 'True')
|
| 372 |
LLM_STREAM = get_or_create_env_var('LLM_STREAM', 'True')
|
| 373 |
LLM_THREADS = int(get_or_create_env_var('LLM_THREADS', '-1'))
|
| 374 |
+
LLM_BATCH_SIZE = int(get_or_create_env_var('LLM_BATCH_SIZE', '512'))
|
| 375 |
+
LLM_CONTEXT_LENGTH = int(get_or_create_env_var('LLM_CONTEXT_LENGTH', '32768'))
|
| 376 |
LLM_SAMPLE = get_or_create_env_var('LLM_SAMPLE', 'True')
|
| 377 |
+
LLM_STOP_STRINGS = get_or_create_env_var('LLM_STOP_STRINGS', r"[' ','\n\n\n\n','---------------------------------------------]")
|
| 378 |
+
MULTIMODAL_PROMPT_FORMAT = get_or_create_env_var('MULTIMODAL_PROMPT_FORMAT', 'False')
|
| 379 |
SPECULATIVE_DECODING = get_or_create_env_var('SPECULATIVE_DECODING', 'False')
|
| 380 |
NUM_PRED_TOKENS = int(get_or_create_env_var('NUM_PRED_TOKENS', '2'))
|
| 381 |
+
KV_QUANT_LEVEL = int(get_or_create_env_var('KV_QUANT_LEVEL', '16'))
|
| 382 |
+
|
| 383 |
+
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
# If you are using e.g. gpt-oss, you can add a reasoning suffix to set reasoning level, or turn it off in the case of Qwen 3 4B
|
| 387 |
+
if CHOSEN_LOCAL_MODEL_TYPE == "gpt-oss-20b": REASONING_SUFFIX = get_or_create_env_var('REASONING_SUFFIX', 'Reasoning: low')
|
| 388 |
+
elif CHOSEN_LOCAL_MODEL_TYPE == "Qwen 3 4B" and USE_LLAMA_CPP == "False": REASONING_SUFFIX = get_or_create_env_var('REASONING_SUFFIX', '/nothink')
|
| 389 |
+
else: REASONING_SUFFIX = get_or_create_env_var('REASONING_SUFFIX', '')
|
| 390 |
|
| 391 |
# Transformers variables
|
| 392 |
COMPILE_TRANSFORMERS = get_or_create_env_var('COMPILE_TRANSFORMERS', 'False') # Whether to compile transformers models
|
tools/custom_csvlogger.py
CHANGED
|
@@ -14,8 +14,7 @@ from multiprocessing import Lock
|
|
| 14 |
from pathlib import Path
|
| 15 |
from typing import TYPE_CHECKING, Any
|
| 16 |
from gradio_client import utils as client_utils
|
| 17 |
-
|
| 18 |
-
from gradio import utils, wasm_utils
|
| 19 |
from tools.config import AWS_REGION, AWS_ACCESS_KEY, AWS_SECRET_KEY, RUN_AWS_FUNCTIONS
|
| 20 |
|
| 21 |
|
|
@@ -56,9 +55,7 @@ class CSVLogger_custom(FlaggingCallback):
|
|
| 56 |
self.simplify_file_data = simplify_file_data
|
| 57 |
self.verbose = verbose
|
| 58 |
self.dataset_file_name = dataset_file_name
|
| 59 |
-
self.lock = (
|
| 60 |
-
Lock() if not wasm_utils.IS_WASM else contextlib.nullcontext()
|
| 61 |
-
) # The multiprocessing module doesn't work on Lite.
|
| 62 |
|
| 63 |
def setup(
|
| 64 |
self,
|
|
|
|
| 14 |
from pathlib import Path
|
| 15 |
from typing import TYPE_CHECKING, Any
|
| 16 |
from gradio_client import utils as client_utils
|
| 17 |
+
from gradio import utils
|
|
|
|
| 18 |
from tools.config import AWS_REGION, AWS_ACCESS_KEY, AWS_SECRET_KEY, RUN_AWS_FUNCTIONS
|
| 19 |
|
| 20 |
|
|
|
|
| 55 |
self.simplify_file_data = simplify_file_data
|
| 56 |
self.verbose = verbose
|
| 57 |
self.dataset_file_name = dataset_file_name
|
| 58 |
+
self.lock = Lock()
|
|
|
|
|
|
|
| 59 |
|
| 60 |
def setup(
|
| 61 |
self,
|
tools/dedup_summaries.py
CHANGED
|
@@ -161,8 +161,6 @@ def deduplicate_topics(reference_df:pd.DataFrame,
|
|
| 161 |
|
| 162 |
reference_file_out_path = output_folder + reference_table_file_name
|
| 163 |
unique_topics_file_out_path = output_folder + unique_topics_table_file_name
|
| 164 |
-
#reference_df.to_csv(reference_file_out_path, index = None, encoding='utf-8-sig')
|
| 165 |
-
#topic_summary_df.to_csv(unique_topics_file_out_path, index=None, encoding='utf-8-sig')
|
| 166 |
|
| 167 |
output_files.append(reference_file_out_path)
|
| 168 |
output_files.append(unique_topics_file_out_path)
|
|
@@ -195,13 +193,17 @@ def deduplicate_topics(reference_df:pd.DataFrame,
|
|
| 195 |
if "Group" not in reference_df.columns:
|
| 196 |
reference_df["Group"] = "All"
|
| 197 |
for i in range(0, 8):
|
| 198 |
-
if merge_sentiment == "No":
|
| 199 |
if merge_general_topics == "No":
|
| 200 |
reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
|
| 201 |
reference_df_unique = reference_df.drop_duplicates("old_category")
|
| 202 |
|
| 203 |
-
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
group["Subtopic"],
|
| 206 |
group["Sentiment"],
|
| 207 |
reference_df,
|
|
@@ -209,30 +211,38 @@ def deduplicate_topics(reference_df:pd.DataFrame,
|
|
| 209 |
merge_general_topics="No",
|
| 210 |
threshold=score_threshold
|
| 211 |
)
|
| 212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
else:
|
| 214 |
# This case should allow cross-topic matching but is still grouping by Sentiment
|
| 215 |
reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
|
| 216 |
reference_df_unique = reference_df.drop_duplicates("old_category")
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
|
|
|
| 220 |
group["Subtopic"],
|
| 221 |
group["Sentiment"],
|
| 222 |
reference_df,
|
| 223 |
-
general_topic_series=None,
|
| 224 |
merge_general_topics="Yes",
|
| 225 |
threshold=score_threshold
|
| 226 |
)
|
| 227 |
-
|
|
|
|
|
|
|
| 228 |
else:
|
| 229 |
if merge_general_topics == "No":
|
| 230 |
-
# Update this case to maintain general topic boundaries
|
| 231 |
reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
|
| 232 |
reference_df_unique = reference_df.drop_duplicates("old_category")
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
|
|
|
| 236 |
group["Subtopic"],
|
| 237 |
group["Sentiment"],
|
| 238 |
reference_df,
|
|
@@ -241,9 +251,10 @@ def deduplicate_topics(reference_df:pd.DataFrame,
|
|
| 241 |
merge_sentiment=merge_sentiment,
|
| 242 |
threshold=score_threshold
|
| 243 |
)
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
|
|
|
| 247 |
reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
|
| 248 |
reference_df_unique = reference_df.drop_duplicates("old_category")
|
| 249 |
|
|
@@ -251,14 +262,13 @@ def deduplicate_topics(reference_df:pd.DataFrame,
|
|
| 251 |
reference_df_unique["Subtopic"],
|
| 252 |
reference_df_unique["Sentiment"],
|
| 253 |
reference_df,
|
| 254 |
-
general_topic_series=None,
|
| 255 |
merge_general_topics="Yes",
|
| 256 |
merge_sentiment=merge_sentiment,
|
| 257 |
threshold=score_threshold
|
| 258 |
).reset_index(drop=True)
|
| 259 |
-
|
| 260 |
if deduplicated_topic_map_df['deduplicated_category'].isnull().all():
|
| 261 |
-
# Check if 'deduplicated_category' contains any values
|
| 262 |
print("No deduplicated categories found, skipping the following code.")
|
| 263 |
|
| 264 |
else:
|
|
@@ -785,6 +795,9 @@ def summarise_output_topics(sampled_reference_table_df:pd.DataFrame,
|
|
| 785 |
for prompt, summary, metadata, batch, model_choice, validated, group, task_type, file_name in zip(all_prompts_content, all_summaries_content, all_metadata_content, all_batches_content, all_model_choice_content, all_validated_content, all_groups_content, all_task_type_content, all_file_names_content)
|
| 786 |
]
|
| 787 |
|
|
|
|
|
|
|
|
|
|
| 788 |
out_logged_content = existing_logged_content + all_logged_content
|
| 789 |
|
| 790 |
### Save output files
|
|
@@ -1004,7 +1017,7 @@ def overall_summary(topic_summary_df:pd.DataFrame,
|
|
| 1004 |
# Write overall outputs to csv
|
| 1005 |
overall_summary_output_csv_path = output_folder + batch_file_path_details + "_overall_summary_" + model_choice_clean_short + ".csv"
|
| 1006 |
summarised_outputs_df = pd.DataFrame(data={"Group":unique_groups, "Summary":summarised_outputs_for_df})
|
| 1007 |
-
summarised_outputs_df.to_csv(overall_summary_output_csv_path, index=None)
|
| 1008 |
output_files.append(overall_summary_output_csv_path)
|
| 1009 |
|
| 1010 |
summarised_outputs_df_for_display = pd.DataFrame(data={"Group":unique_groups, "Summary":summarised_outputs})
|
|
@@ -1031,6 +1044,9 @@ def overall_summary(topic_summary_df:pd.DataFrame,
|
|
| 1031 |
for prompt, summary, metadata, batch, model_choice, validated, group, task_type, file_name in zip(all_prompts_content, all_summaries_content, all_metadata_content, all_batches_content, all_model_choice_content, all_validated_content, all_groups_content, all_task_type_content, all_file_names_content)
|
| 1032 |
]
|
| 1033 |
|
|
|
|
|
|
|
|
|
|
| 1034 |
out_logged_content = existing_logged_content + all_logged_content
|
| 1035 |
|
| 1036 |
return output_files, html_output_table, summarised_outputs_df, out_metadata_str, input_tokens_num, output_tokens_num, number_of_calls_num, time_taken, out_message, out_logged_content
|
|
|
|
| 161 |
|
| 162 |
reference_file_out_path = output_folder + reference_table_file_name
|
| 163 |
unique_topics_file_out_path = output_folder + unique_topics_table_file_name
|
|
|
|
|
|
|
| 164 |
|
| 165 |
output_files.append(reference_file_out_path)
|
| 166 |
output_files.append(unique_topics_file_out_path)
|
|
|
|
| 193 |
if "Group" not in reference_df.columns:
|
| 194 |
reference_df["Group"] = "All"
|
| 195 |
for i in range(0, 8):
|
| 196 |
+
if merge_sentiment == "No":
|
| 197 |
if merge_general_topics == "No":
|
| 198 |
reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
|
| 199 |
reference_df_unique = reference_df.drop_duplicates("old_category")
|
| 200 |
|
| 201 |
+
# Create an empty list to store results from each group
|
| 202 |
+
results = []
|
| 203 |
+
# Iterate over each group instead of using .apply()
|
| 204 |
+
for name, group in reference_df_unique.groupby(["General topic", "Sentiment", "Group"]):
|
| 205 |
+
# Run your function on the 'group' DataFrame
|
| 206 |
+
result = deduplicate_categories(
|
| 207 |
group["Subtopic"],
|
| 208 |
group["Sentiment"],
|
| 209 |
reference_df,
|
|
|
|
| 211 |
merge_general_topics="No",
|
| 212 |
threshold=score_threshold
|
| 213 |
)
|
| 214 |
+
results.append(result)
|
| 215 |
+
|
| 216 |
+
# Concatenate all the results into a single DataFrame
|
| 217 |
+
deduplicated_topic_map_df = pd.concat(results).reset_index(drop=True)
|
| 218 |
+
# --- MODIFIED SECTION END ---
|
| 219 |
+
|
| 220 |
else:
|
| 221 |
# This case should allow cross-topic matching but is still grouping by Sentiment
|
| 222 |
reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
|
| 223 |
reference_df_unique = reference_df.drop_duplicates("old_category")
|
| 224 |
+
|
| 225 |
+
results = []
|
| 226 |
+
for name, group in reference_df_unique.groupby("Sentiment"):
|
| 227 |
+
result = deduplicate_categories(
|
| 228 |
group["Subtopic"],
|
| 229 |
group["Sentiment"],
|
| 230 |
reference_df,
|
| 231 |
+
general_topic_series=None,
|
| 232 |
merge_general_topics="Yes",
|
| 233 |
threshold=score_threshold
|
| 234 |
)
|
| 235 |
+
results.append(result)
|
| 236 |
+
deduplicated_topic_map_df = pd.concat(results).reset_index(drop=True)
|
| 237 |
+
|
| 238 |
else:
|
| 239 |
if merge_general_topics == "No":
|
|
|
|
| 240 |
reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
|
| 241 |
reference_df_unique = reference_df.drop_duplicates("old_category")
|
| 242 |
+
|
| 243 |
+
results = []
|
| 244 |
+
for name, group in reference_df_unique.groupby("General topic"):
|
| 245 |
+
result = deduplicate_categories(
|
| 246 |
group["Subtopic"],
|
| 247 |
group["Sentiment"],
|
| 248 |
reference_df,
|
|
|
|
| 251 |
merge_sentiment=merge_sentiment,
|
| 252 |
threshold=score_threshold
|
| 253 |
)
|
| 254 |
+
results.append(result)
|
| 255 |
+
deduplicated_topic_map_df = pd.concat(results).reset_index(drop=True)
|
| 256 |
+
|
| 257 |
+
else:
|
| 258 |
reference_df["old_category"] = reference_df["Subtopic"] + " | " + reference_df["Sentiment"]
|
| 259 |
reference_df_unique = reference_df.drop_duplicates("old_category")
|
| 260 |
|
|
|
|
| 262 |
reference_df_unique["Subtopic"],
|
| 263 |
reference_df_unique["Sentiment"],
|
| 264 |
reference_df,
|
| 265 |
+
general_topic_series=None,
|
| 266 |
merge_general_topics="Yes",
|
| 267 |
merge_sentiment=merge_sentiment,
|
| 268 |
threshold=score_threshold
|
| 269 |
).reset_index(drop=True)
|
| 270 |
+
|
| 271 |
if deduplicated_topic_map_df['deduplicated_category'].isnull().all():
|
|
|
|
| 272 |
print("No deduplicated categories found, skipping the following code.")
|
| 273 |
|
| 274 |
else:
|
|
|
|
| 795 |
for prompt, summary, metadata, batch, model_choice, validated, group, task_type, file_name in zip(all_prompts_content, all_summaries_content, all_metadata_content, all_batches_content, all_model_choice_content, all_validated_content, all_groups_content, all_task_type_content, all_file_names_content)
|
| 796 |
]
|
| 797 |
|
| 798 |
+
if isinstance(existing_logged_content, pd.DataFrame):
|
| 799 |
+
existing_logged_content = existing_logged_content.to_dict(orient="records")
|
| 800 |
+
|
| 801 |
out_logged_content = existing_logged_content + all_logged_content
|
| 802 |
|
| 803 |
### Save output files
|
|
|
|
| 1017 |
# Write overall outputs to csv
|
| 1018 |
overall_summary_output_csv_path = output_folder + batch_file_path_details + "_overall_summary_" + model_choice_clean_short + ".csv"
|
| 1019 |
summarised_outputs_df = pd.DataFrame(data={"Group":unique_groups, "Summary":summarised_outputs_for_df})
|
| 1020 |
+
summarised_outputs_df.to_csv(overall_summary_output_csv_path, index=None, encoding='utf-8-sig')
|
| 1021 |
output_files.append(overall_summary_output_csv_path)
|
| 1022 |
|
| 1023 |
summarised_outputs_df_for_display = pd.DataFrame(data={"Group":unique_groups, "Summary":summarised_outputs})
|
|
|
|
| 1044 |
for prompt, summary, metadata, batch, model_choice, validated, group, task_type, file_name in zip(all_prompts_content, all_summaries_content, all_metadata_content, all_batches_content, all_model_choice_content, all_validated_content, all_groups_content, all_task_type_content, all_file_names_content)
|
| 1045 |
]
|
| 1046 |
|
| 1047 |
+
if isinstance(existing_logged_content, pd.DataFrame):
|
| 1048 |
+
existing_logged_content = existing_logged_content.to_dict(orient="records")
|
| 1049 |
+
|
| 1050 |
out_logged_content = existing_logged_content + all_logged_content
|
| 1051 |
|
| 1052 |
return output_files, html_output_table, summarised_outputs_df, out_metadata_str, input_tokens_num, output_tokens_num, number_of_calls_num, time_taken, out_message, out_logged_content
|
tools/llm_api_call.py
CHANGED
|
@@ -15,7 +15,7 @@ from typing import List, Tuple, Any
|
|
| 15 |
from io import StringIO
|
| 16 |
GradioFileData = gr.FileData
|
| 17 |
|
| 18 |
-
from tools.prompts import initial_table_prompt, prompt2, prompt3, initial_table_system_prompt, add_existing_topics_system_prompt, add_existing_topics_prompt, force_existing_topics_prompt, allow_new_topics_prompt, force_single_topic_prompt, add_existing_topics_assistant_prefill, initial_table_assistant_prefill, structured_summary_prompt
|
| 19 |
from tools.helper_functions import read_file, put_columns_in_df, wrap_text, initial_clean, load_in_data_file, load_in_file, create_topic_summary_df_from_reference_table, convert_reference_table_to_pivot_table, get_basic_response_data, clean_column_name, load_in_previous_data_files, create_batch_file_path_details, move_overall_summary_output_files_to_front_page
|
| 20 |
from tools.llm_funcs import ResponseObject, construct_gemini_generative_model, call_llm_with_markdown_table_checks, create_missing_references_df, calculate_tokens_from_metadata, construct_azure_client, get_model, get_tokenizer, get_assistant_model
|
| 21 |
from tools.config import RUN_LOCAL_MODEL, AWS_REGION, MAX_COMMENT_CHARS, MAX_OUTPUT_VALIDATION_ATTEMPTS, LLM_MAX_NEW_TOKENS, TIMEOUT_WAIT, NUMBER_OF_RETRY_ATTEMPTS, MAX_TIME_FOR_LOOP, BATCH_SIZE_DEFAULT, DEDUPLICATION_THRESHOLD, model_name_map, OUTPUT_FOLDER, CHOSEN_LOCAL_MODEL_TYPE, LOCAL_REPO_ID, LOCAL_MODEL_FILE, LOCAL_MODEL_FOLDER, LLM_SEED, MAX_GROUPS, REASONING_SUFFIX, AZURE_INFERENCE_ENDPOINT, MAX_ROWS, MAXIMUM_ZERO_SHOT_TOPICS, MAX_SPACES_GPU_RUN_TIME, OUTPUT_DEBUG_FILES
|
|
@@ -352,9 +352,9 @@ def write_llm_output_and_logs(response_text: str,
|
|
| 352 |
topic_table_out_path = "topic_table_error.csv"
|
| 353 |
reference_table_out_path = "reference_table_error.csv"
|
| 354 |
topic_summary_df_out_path = "unique_topic_table_error.csv"
|
| 355 |
-
topic_with_response_df = pd.DataFrame()
|
| 356 |
-
out_reference_df = pd.DataFrame()
|
| 357 |
-
out_topic_summary_df = pd.DataFrame()
|
| 358 |
is_error = False # If there was an error in parsing, return boolean saying error
|
| 359 |
# Convert conversation to string and add to log outputs
|
| 360 |
whole_conversation_str = '\n'.join(whole_conversation)
|
|
@@ -385,6 +385,7 @@ def write_llm_output_and_logs(response_text: str,
|
|
| 385 |
topic_with_response_df, is_error = convert_response_text_to_dataframe(response_text)
|
| 386 |
except Exception as e:
|
| 387 |
print("Error in parsing markdown table from response text:", e)
|
|
|
|
| 388 |
return topic_table_out_path, reference_table_out_path, topic_summary_df_out_path, topic_with_response_df, out_reference_df, out_topic_summary_df, batch_file_path_details, is_error
|
| 389 |
|
| 390 |
# Rename columns to ensure consistent use of data frames later in code
|
|
@@ -420,8 +421,11 @@ def write_llm_output_and_logs(response_text: str,
|
|
| 420 |
for index, row in topic_with_response_df.iterrows():
|
| 421 |
references = re.findall(r'\d+', str(row.iloc[3])) if pd.notna(row.iloc[3]) else []
|
| 422 |
# If no numbers found in the Response References column, check the Summary column in case reference numbers were put there by mistake
|
| 423 |
-
if not references:
|
| 424 |
-
|
|
|
|
|
|
|
|
|
|
| 425 |
|
| 426 |
# Filter out references that are outside the valid range
|
| 427 |
if references:
|
|
@@ -695,6 +699,7 @@ def extract_topics(in_data_file: GradioFileData,
|
|
| 695 |
assistant_model:object=list(),
|
| 696 |
max_rows:int=max_rows,
|
| 697 |
original_full_file_name:str="",
|
|
|
|
| 698 |
progress=Progress(track_tqdm=False)):
|
| 699 |
|
| 700 |
'''
|
|
@@ -749,6 +754,7 @@ def extract_topics(in_data_file: GradioFileData,
|
|
| 749 |
- assistant_model: Assistant model object for local inference.
|
| 750 |
- max_rows: The maximum number of rows to process.
|
| 751 |
- original_full_file_name: The original full file name.
|
|
|
|
| 752 |
- progress (Progress): A progress tracker.
|
| 753 |
|
| 754 |
'''
|
|
@@ -863,6 +869,9 @@ def extract_topics(in_data_file: GradioFileData,
|
|
| 863 |
# Call the function to prepare the input table
|
| 864 |
simplified_csv_table_path, normalised_simple_markdown_table, start_row, end_row, batch_basic_response_df = data_file_to_markdown_table(file_data, file_name, chosen_cols, latest_batch_completed, batch_size)
|
| 865 |
|
|
|
|
|
|
|
|
|
|
| 866 |
# Conversation history
|
| 867 |
conversation_history = list()
|
| 868 |
|
|
@@ -951,11 +960,15 @@ def extract_topics(in_data_file: GradioFileData,
|
|
| 951 |
# Format the summary prompt with the response table and topics
|
| 952 |
if produce_structures_summary_radio != "Yes":
|
| 953 |
formatted_summary_prompt = add_existing_topics_prompt.format(response_table=normalised_simple_markdown_table,
|
| 954 |
-
|
| 955 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 956 |
else:
|
| 957 |
formatted_summary_prompt = structured_summary_prompt.format(response_table=normalised_simple_markdown_table,
|
| 958 |
-
|
| 959 |
|
| 960 |
full_prompt = formatted_system_prompt + "\n" + formatted_summary_prompt
|
| 961 |
|
|
@@ -997,7 +1010,7 @@ def extract_topics(in_data_file: GradioFileData,
|
|
| 997 |
|
| 998 |
## Reference table mapping response numbers to topics
|
| 999 |
if output_debug_files == "True":
|
| 1000 |
-
new_reference_df.to_csv(reference_table_out_path, index=None)
|
| 1001 |
out_file_paths.append(reference_table_out_path)
|
| 1002 |
|
| 1003 |
## Unique topic list
|
|
@@ -1006,7 +1019,7 @@ def extract_topics(in_data_file: GradioFileData,
|
|
| 1006 |
new_topic_summary_df["Group"] = group_name
|
| 1007 |
|
| 1008 |
if output_debug_files == "True":
|
| 1009 |
-
new_topic_summary_df.to_csv(topic_summary_df_out_path, index=None)
|
| 1010 |
out_file_paths.append(topic_summary_df_out_path)
|
| 1011 |
|
| 1012 |
# Outputs for markdown table output
|
|
@@ -1039,7 +1052,8 @@ def extract_topics(in_data_file: GradioFileData,
|
|
| 1039 |
|
| 1040 |
# Format the summary prompt with the response table and topics
|
| 1041 |
if produce_structures_summary_radio != "Yes":
|
| 1042 |
-
formatted_initial_table_prompt = initial_table_prompt.format(response_table=normalised_simple_markdown_table, sentiment_choices=sentiment_prompt
|
|
|
|
| 1043 |
else:
|
| 1044 |
unique_topics_markdown="No suggested headings for this summary"
|
| 1045 |
formatted_initial_table_prompt = structured_summary_prompt.format(response_table=normalised_simple_markdown_table, topics=unique_topics_markdown)
|
|
@@ -1076,7 +1090,7 @@ def extract_topics(in_data_file: GradioFileData,
|
|
| 1076 |
if output_debug_files == "True":
|
| 1077 |
|
| 1078 |
# Output reference table
|
| 1079 |
-
reference_df.to_csv(reference_table_out_path, index=None)
|
| 1080 |
out_file_paths.append(reference_table_out_path)
|
| 1081 |
|
| 1082 |
## Unique topic list
|
|
@@ -1086,7 +1100,7 @@ def extract_topics(in_data_file: GradioFileData,
|
|
| 1086 |
new_topic_summary_df["Group"] = group_name
|
| 1087 |
|
| 1088 |
if output_debug_files == "True":
|
| 1089 |
-
new_topic_summary_df.to_csv(topic_summary_df_out_path, index=None)
|
| 1090 |
out_file_paths.append(topic_summary_df_out_path)
|
| 1091 |
|
| 1092 |
whole_conversation_metadata.append(whole_conversation_metadata_str)
|
|
@@ -1160,7 +1174,7 @@ def extract_topics(in_data_file: GradioFileData,
|
|
| 1160 |
basic_response_data_out_path = output_folder + file_path_details + "_simplified_data_file_" + model_choice_clean_short + "_temp_" + str(temperature) + ".csv"
|
| 1161 |
|
| 1162 |
## Reference table mapping response numbers to topics
|
| 1163 |
-
existing_reference_df.to_csv(reference_table_out_path, index=None)
|
| 1164 |
out_file_paths.append(reference_table_out_path)
|
| 1165 |
join_file_paths.append(reference_table_out_path)
|
| 1166 |
|
|
@@ -1250,6 +1264,7 @@ def wrapper_extract_topics_per_column_value(
|
|
| 1250 |
azure_api_key_textbox:str="",
|
| 1251 |
output_folder: str = OUTPUT_FOLDER,
|
| 1252 |
existing_logged_content:list=list(),
|
|
|
|
| 1253 |
force_single_topic_prompt: str = force_single_topic_prompt,
|
| 1254 |
max_tokens: int = max_tokens,
|
| 1255 |
model_name_map: dict = model_name_map,
|
|
@@ -1304,6 +1319,7 @@ def wrapper_extract_topics_per_column_value(
|
|
| 1304 |
:param output_folder: The folder where output files will be saved.
|
| 1305 |
:param existing_logged_content: A list of existing logged content.
|
| 1306 |
:param force_single_topic_prompt: Prompt for forcing a single topic.
|
|
|
|
| 1307 |
:param max_tokens: Maximum tokens for LLM generation.
|
| 1308 |
:param model_name_map: Dictionary mapping model names to their properties.
|
| 1309 |
:param max_time_for_loop: Maximum time allowed for the processing loop.
|
|
@@ -1312,7 +1328,7 @@ def wrapper_extract_topics_per_column_value(
|
|
| 1312 |
:param model: Model object for local inference.
|
| 1313 |
:param tokenizer: Tokenizer object for local inference.
|
| 1314 |
:param assistant_model: Assistant model object for local inference.
|
| 1315 |
-
:param max_rows: The maximum number of rows to process.
|
| 1316 |
:param progress: Gradio Progress object for tracking progress.
|
| 1317 |
:return: A tuple containing consolidated results, mimicking the return structure of `extract_topics`.
|
| 1318 |
"""
|
|
@@ -1488,6 +1504,7 @@ def wrapper_extract_topics_per_column_value(
|
|
| 1488 |
max_rows=max_rows,
|
| 1489 |
existing_logged_content=all_logged_content,
|
| 1490 |
original_full_file_name=original_file_name,
|
|
|
|
| 1491 |
progress=progress
|
| 1492 |
)
|
| 1493 |
|
|
@@ -1521,21 +1538,23 @@ def wrapper_extract_topics_per_column_value(
|
|
| 1521 |
# For now, it will continue
|
| 1522 |
continue
|
| 1523 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1524 |
if "Group" in acc_reference_df.columns:
|
| 1525 |
-
|
| 1526 |
-
model_choice_clean_short = clean_column_name(model_choice_clean, max_length=20, front_characters=False)
|
| 1527 |
-
overall_file_name = clean_column_name(original_file_name, max_length=20)
|
| 1528 |
-
column_clean = clean_column_name(chosen_cols, max_length=20)
|
| 1529 |
|
| 1530 |
acc_reference_df_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_final_reference_table_" + model_choice_clean_short + ".csv"
|
| 1531 |
acc_topic_summary_df_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_final_unique_topics_" + model_choice_clean_short + ".csv"
|
| 1532 |
acc_reference_df_pivot_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_final_reference_pivot_" + model_choice_clean_short + ".csv"
|
| 1533 |
acc_missing_df_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_missing_df_" + model_choice_clean_short + ".csv"
|
| 1534 |
|
| 1535 |
-
acc_reference_df.to_csv(acc_reference_df_path, index=None)
|
| 1536 |
-
acc_topic_summary_df.to_csv(acc_topic_summary_df_path, index=None)
|
| 1537 |
-
acc_reference_df_pivot.to_csv(acc_reference_df_pivot_path, index=None)
|
| 1538 |
-
acc_missing_df.to_csv(acc_missing_df_path, index=None)
|
| 1539 |
|
| 1540 |
acc_log_files_output_paths.append(acc_missing_df_path)
|
| 1541 |
|
|
@@ -1740,6 +1759,7 @@ def all_in_one_pipeline(
|
|
| 1740 |
model_name_map_state: dict = model_name_map,
|
| 1741 |
usage_logs_location: str = "",
|
| 1742 |
existing_logged_content:list=list(),
|
|
|
|
| 1743 |
model: object = None,
|
| 1744 |
tokenizer: object = None,
|
| 1745 |
assistant_model: object = None,
|
|
@@ -1749,7 +1769,60 @@ def all_in_one_pipeline(
|
|
| 1749 |
"""
|
| 1750 |
Orchestrates the full All-in-one flow: extract → deduplicate → summarise → overall summary → Excel export.
|
| 1751 |
|
| 1752 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1753 |
"""
|
| 1754 |
|
| 1755 |
# Load local model if it's not already loaded
|
|
@@ -1830,7 +1903,8 @@ def all_in_one_pipeline(
|
|
| 1830 |
model=model,
|
| 1831 |
tokenizer=tokenizer,
|
| 1832 |
assistant_model=assistant_model,
|
| 1833 |
-
max_rows=max_rows
|
|
|
|
| 1834 |
)
|
| 1835 |
|
| 1836 |
total_input_tokens += out_input_tokens
|
|
|
|
| 15 |
from io import StringIO
|
| 16 |
GradioFileData = gr.FileData
|
| 17 |
|
| 18 |
+
from tools.prompts import initial_table_prompt, prompt2, prompt3, initial_table_system_prompt, add_existing_topics_system_prompt, add_existing_topics_prompt, force_existing_topics_prompt, allow_new_topics_prompt, force_single_topic_prompt, add_existing_topics_assistant_prefill, initial_table_assistant_prefill, structured_summary_prompt, default_response_reference_format, single_response_reference_format
|
| 19 |
from tools.helper_functions import read_file, put_columns_in_df, wrap_text, initial_clean, load_in_data_file, load_in_file, create_topic_summary_df_from_reference_table, convert_reference_table_to_pivot_table, get_basic_response_data, clean_column_name, load_in_previous_data_files, create_batch_file_path_details, move_overall_summary_output_files_to_front_page
|
| 20 |
from tools.llm_funcs import ResponseObject, construct_gemini_generative_model, call_llm_with_markdown_table_checks, create_missing_references_df, calculate_tokens_from_metadata, construct_azure_client, get_model, get_tokenizer, get_assistant_model
|
| 21 |
from tools.config import RUN_LOCAL_MODEL, AWS_REGION, MAX_COMMENT_CHARS, MAX_OUTPUT_VALIDATION_ATTEMPTS, LLM_MAX_NEW_TOKENS, TIMEOUT_WAIT, NUMBER_OF_RETRY_ATTEMPTS, MAX_TIME_FOR_LOOP, BATCH_SIZE_DEFAULT, DEDUPLICATION_THRESHOLD, model_name_map, OUTPUT_FOLDER, CHOSEN_LOCAL_MODEL_TYPE, LOCAL_REPO_ID, LOCAL_MODEL_FILE, LOCAL_MODEL_FOLDER, LLM_SEED, MAX_GROUPS, REASONING_SUFFIX, AZURE_INFERENCE_ENDPOINT, MAX_ROWS, MAXIMUM_ZERO_SHOT_TOPICS, MAX_SPACES_GPU_RUN_TIME, OUTPUT_DEBUG_FILES
|
|
|
|
| 352 |
topic_table_out_path = "topic_table_error.csv"
|
| 353 |
reference_table_out_path = "reference_table_error.csv"
|
| 354 |
topic_summary_df_out_path = "unique_topic_table_error.csv"
|
| 355 |
+
topic_with_response_df = pd.DataFrame(columns=["General topic", "Subtopic", "Sentiment", "Response References", "Summary"])
|
| 356 |
+
out_reference_df = pd.DataFrame(columns=["Response References", "General topic", "Subtopic", "Sentiment", "Summary", "Start row of group"])
|
| 357 |
+
out_topic_summary_df = pd.DataFrame(columns=["General topic", "Subtopic", "Sentiment"])
|
| 358 |
is_error = False # If there was an error in parsing, return boolean saying error
|
| 359 |
# Convert conversation to string and add to log outputs
|
| 360 |
whole_conversation_str = '\n'.join(whole_conversation)
|
|
|
|
| 385 |
topic_with_response_df, is_error = convert_response_text_to_dataframe(response_text)
|
| 386 |
except Exception as e:
|
| 387 |
print("Error in parsing markdown table from response text:", e)
|
| 388 |
+
|
| 389 |
return topic_table_out_path, reference_table_out_path, topic_summary_df_out_path, topic_with_response_df, out_reference_df, out_topic_summary_df, batch_file_path_details, is_error
|
| 390 |
|
| 391 |
# Rename columns to ensure consistent use of data frames later in code
|
|
|
|
| 421 |
for index, row in topic_with_response_df.iterrows():
|
| 422 |
references = re.findall(r'\d+', str(row.iloc[3])) if pd.notna(row.iloc[3]) else []
|
| 423 |
# If no numbers found in the Response References column, check the Summary column in case reference numbers were put there by mistake
|
| 424 |
+
##if not references:
|
| 425 |
+
# references = re.findall(r'\d+', str(row.iloc[4])) if pd.notna(row.iloc[4]) else []
|
| 426 |
+
# If batch size is 1, references will always be 1
|
| 427 |
+
if batch_size_number == 1:
|
| 428 |
+
references = "1"
|
| 429 |
|
| 430 |
# Filter out references that are outside the valid range
|
| 431 |
if references:
|
|
|
|
| 699 |
assistant_model:object=list(),
|
| 700 |
max_rows:int=max_rows,
|
| 701 |
original_full_file_name:str="",
|
| 702 |
+
add_existing_topics_summary_format:str="",
|
| 703 |
progress=Progress(track_tqdm=False)):
|
| 704 |
|
| 705 |
'''
|
|
|
|
| 754 |
- assistant_model: Assistant model object for local inference.
|
| 755 |
- max_rows: The maximum number of rows to process.
|
| 756 |
- original_full_file_name: The original full file name.
|
| 757 |
+
- add_existing_topics_summary_format: Initial instructions to guide the format for the initial summary of the topics.
|
| 758 |
- progress (Progress): A progress tracker.
|
| 759 |
|
| 760 |
'''
|
|
|
|
| 869 |
# Call the function to prepare the input table
|
| 870 |
simplified_csv_table_path, normalised_simple_markdown_table, start_row, end_row, batch_basic_response_df = data_file_to_markdown_table(file_data, file_name, chosen_cols, latest_batch_completed, batch_size)
|
| 871 |
|
| 872 |
+
if batch_basic_response_df.shape[0] == 1: response_reference_format = single_response_reference_format
|
| 873 |
+
else: response_reference_format = default_response_reference_format
|
| 874 |
+
|
| 875 |
# Conversation history
|
| 876 |
conversation_history = list()
|
| 877 |
|
|
|
|
| 960 |
# Format the summary prompt with the response table and topics
|
| 961 |
if produce_structures_summary_radio != "Yes":
|
| 962 |
formatted_summary_prompt = add_existing_topics_prompt.format(response_table=normalised_simple_markdown_table,
|
| 963 |
+
topics=unique_topics_markdown,
|
| 964 |
+
topic_assignment=topic_assignment_prompt,
|
| 965 |
+
force_single_topic=force_single_topic_prompt,
|
| 966 |
+
sentiment_choices=sentiment_prompt,
|
| 967 |
+
response_reference_format=response_reference_format,
|
| 968 |
+
add_existing_topics_summary_format=add_existing_topics_summary_format)
|
| 969 |
else:
|
| 970 |
formatted_summary_prompt = structured_summary_prompt.format(response_table=normalised_simple_markdown_table,
|
| 971 |
+
topics=unique_topics_markdown)
|
| 972 |
|
| 973 |
full_prompt = formatted_system_prompt + "\n" + formatted_summary_prompt
|
| 974 |
|
|
|
|
| 1010 |
|
| 1011 |
## Reference table mapping response numbers to topics
|
| 1012 |
if output_debug_files == "True":
|
| 1013 |
+
new_reference_df.to_csv(reference_table_out_path, index=None, encoding='utf-8-sig')
|
| 1014 |
out_file_paths.append(reference_table_out_path)
|
| 1015 |
|
| 1016 |
## Unique topic list
|
|
|
|
| 1019 |
new_topic_summary_df["Group"] = group_name
|
| 1020 |
|
| 1021 |
if output_debug_files == "True":
|
| 1022 |
+
new_topic_summary_df.to_csv(topic_summary_df_out_path, index=None, encoding='utf-8-sig')
|
| 1023 |
out_file_paths.append(topic_summary_df_out_path)
|
| 1024 |
|
| 1025 |
# Outputs for markdown table output
|
|
|
|
| 1052 |
|
| 1053 |
# Format the summary prompt with the response table and topics
|
| 1054 |
if produce_structures_summary_radio != "Yes":
|
| 1055 |
+
formatted_initial_table_prompt = initial_table_prompt.format(response_table=normalised_simple_markdown_table, sentiment_choices=sentiment_prompt,
|
| 1056 |
+
response_reference_format=response_reference_format, add_existing_topics_summary_format=add_existing_topics_summary_format)
|
| 1057 |
else:
|
| 1058 |
unique_topics_markdown="No suggested headings for this summary"
|
| 1059 |
formatted_initial_table_prompt = structured_summary_prompt.format(response_table=normalised_simple_markdown_table, topics=unique_topics_markdown)
|
|
|
|
| 1090 |
if output_debug_files == "True":
|
| 1091 |
|
| 1092 |
# Output reference table
|
| 1093 |
+
reference_df.to_csv(reference_table_out_path, index=None, encoding='utf-8-sig')
|
| 1094 |
out_file_paths.append(reference_table_out_path)
|
| 1095 |
|
| 1096 |
## Unique topic list
|
|
|
|
| 1100 |
new_topic_summary_df["Group"] = group_name
|
| 1101 |
|
| 1102 |
if output_debug_files == "True":
|
| 1103 |
+
new_topic_summary_df.to_csv(topic_summary_df_out_path, index=None, encoding='utf-8-sig')
|
| 1104 |
out_file_paths.append(topic_summary_df_out_path)
|
| 1105 |
|
| 1106 |
whole_conversation_metadata.append(whole_conversation_metadata_str)
|
|
|
|
| 1174 |
basic_response_data_out_path = output_folder + file_path_details + "_simplified_data_file_" + model_choice_clean_short + "_temp_" + str(temperature) + ".csv"
|
| 1175 |
|
| 1176 |
## Reference table mapping response numbers to topics
|
| 1177 |
+
existing_reference_df.to_csv(reference_table_out_path, index=None, encoding='utf-8-sig')
|
| 1178 |
out_file_paths.append(reference_table_out_path)
|
| 1179 |
join_file_paths.append(reference_table_out_path)
|
| 1180 |
|
|
|
|
| 1264 |
azure_api_key_textbox:str="",
|
| 1265 |
output_folder: str = OUTPUT_FOLDER,
|
| 1266 |
existing_logged_content:list=list(),
|
| 1267 |
+
add_existing_topics_summary_format:str="",
|
| 1268 |
force_single_topic_prompt: str = force_single_topic_prompt,
|
| 1269 |
max_tokens: int = max_tokens,
|
| 1270 |
model_name_map: dict = model_name_map,
|
|
|
|
| 1319 |
:param output_folder: The folder where output files will be saved.
|
| 1320 |
:param existing_logged_content: A list of existing logged content.
|
| 1321 |
:param force_single_topic_prompt: Prompt for forcing a single topic.
|
| 1322 |
+
:param add_existing_topics_summary_format: Initial instructions to guide the format for the initial summary of the topics.
|
| 1323 |
:param max_tokens: Maximum tokens for LLM generation.
|
| 1324 |
:param model_name_map: Dictionary mapping model names to their properties.
|
| 1325 |
:param max_time_for_loop: Maximum time allowed for the processing loop.
|
|
|
|
| 1328 |
:param model: Model object for local inference.
|
| 1329 |
:param tokenizer: Tokenizer object for local inference.
|
| 1330 |
:param assistant_model: Assistant model object for local inference.
|
| 1331 |
+
:param max_rows: The maximum number of rows to process.
|
| 1332 |
:param progress: Gradio Progress object for tracking progress.
|
| 1333 |
:return: A tuple containing consolidated results, mimicking the return structure of `extract_topics`.
|
| 1334 |
"""
|
|
|
|
| 1504 |
max_rows=max_rows,
|
| 1505 |
existing_logged_content=all_logged_content,
|
| 1506 |
original_full_file_name=original_file_name,
|
| 1507 |
+
add_existing_topics_summary_format=add_existing_topics_summary_format,
|
| 1508 |
progress=progress
|
| 1509 |
)
|
| 1510 |
|
|
|
|
| 1538 |
# For now, it will continue
|
| 1539 |
continue
|
| 1540 |
|
| 1541 |
+
overall_file_name = clean_column_name(original_file_name, max_length=20)
|
| 1542 |
+
model_choice_clean = model_name_map[model_choice]["short_name"]
|
| 1543 |
+
model_choice_clean_short = clean_column_name(model_choice_clean, max_length=20, front_characters=False)
|
| 1544 |
+
column_clean = clean_column_name(chosen_cols, max_length=20)
|
| 1545 |
+
|
| 1546 |
if "Group" in acc_reference_df.columns:
|
| 1547 |
+
|
|
|
|
|
|
|
|
|
|
| 1548 |
|
| 1549 |
acc_reference_df_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_final_reference_table_" + model_choice_clean_short + ".csv"
|
| 1550 |
acc_topic_summary_df_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_final_unique_topics_" + model_choice_clean_short + ".csv"
|
| 1551 |
acc_reference_df_pivot_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_final_reference_pivot_" + model_choice_clean_short + ".csv"
|
| 1552 |
acc_missing_df_path = output_folder + overall_file_name + "_col_" + column_clean + "_all_missing_df_" + model_choice_clean_short + ".csv"
|
| 1553 |
|
| 1554 |
+
acc_reference_df.to_csv(acc_reference_df_path, index=None, encoding='utf-8-sig')
|
| 1555 |
+
acc_topic_summary_df.to_csv(acc_topic_summary_df_path, index=None, encoding='utf-8-sig')
|
| 1556 |
+
acc_reference_df_pivot.to_csv(acc_reference_df_pivot_path, index=None, encoding='utf-8-sig')
|
| 1557 |
+
acc_missing_df.to_csv(acc_missing_df_path, index=None, encoding='utf-8-sig')
|
| 1558 |
|
| 1559 |
acc_log_files_output_paths.append(acc_missing_df_path)
|
| 1560 |
|
|
|
|
| 1759 |
model_name_map_state: dict = model_name_map,
|
| 1760 |
usage_logs_location: str = "",
|
| 1761 |
existing_logged_content:list=list(),
|
| 1762 |
+
add_existing_topics_summary_format:str="",
|
| 1763 |
model: object = None,
|
| 1764 |
tokenizer: object = None,
|
| 1765 |
assistant_model: object = None,
|
|
|
|
| 1769 |
"""
|
| 1770 |
Orchestrates the full All-in-one flow: extract → deduplicate → summarise → overall summary → Excel export.
|
| 1771 |
|
| 1772 |
+
Args:
|
| 1773 |
+
grouping_col (str): The column used for grouping data.
|
| 1774 |
+
in_data_files (List[str]): List of input data file paths.
|
| 1775 |
+
file_data (pd.DataFrame): The input data as a pandas DataFrame.
|
| 1776 |
+
existing_topics_table (pd.DataFrame): DataFrame of existing topics.
|
| 1777 |
+
existing_reference_df (pd.DataFrame): DataFrame of existing reference data.
|
| 1778 |
+
existing_topic_summary_df (pd.DataFrame): DataFrame of existing topic summaries.
|
| 1779 |
+
unique_table_df_display_table_markdown (str): Markdown string for displaying unique topics.
|
| 1780 |
+
original_file_name (str): The original name of the input file.
|
| 1781 |
+
total_number_of_batches (int): Total number of batches for processing.
|
| 1782 |
+
in_api_key (str): API key for the LLM.
|
| 1783 |
+
temperature (float): Temperature setting for the LLM.
|
| 1784 |
+
chosen_cols (List[str]): List of columns chosen for analysis.
|
| 1785 |
+
model_choice (str): The chosen LLM model.
|
| 1786 |
+
candidate_topics (GradioFileData): Gradio file data for candidate topics.
|
| 1787 |
+
first_loop_state (bool): State indicating if it's the first loop.
|
| 1788 |
+
conversation_metadata_text (str): Text containing conversation metadata.
|
| 1789 |
+
latest_batch_completed (int): The latest batch number completed.
|
| 1790 |
+
time_taken_so_far (float): Cumulative time taken so far.
|
| 1791 |
+
initial_table_prompt_text (str): Initial prompt text for table generation.
|
| 1792 |
+
initial_table_system_prompt_text (str): Initial system prompt text for table generation.
|
| 1793 |
+
add_existing_topics_system_prompt_text (str): System prompt for adding existing topics.
|
| 1794 |
+
add_existing_topics_prompt_text (str): Prompt for adding existing topics.
|
| 1795 |
+
number_of_prompts_used (int): Number of prompts used in sequence.
|
| 1796 |
+
batch_size (int): Size of each processing batch.
|
| 1797 |
+
context_text (str): Additional context for the LLM.
|
| 1798 |
+
sentiment_choice (str): Choice for sentiment analysis (e.g., "Yes", "No").
|
| 1799 |
+
force_zero_shot_choice (str): Choice to force zero-shot prompting.
|
| 1800 |
+
in_excel_sheets (List[str]): List of sheet names in the input Excel file.
|
| 1801 |
+
force_single_topic_choice (str): Choice to force single topic extraction.
|
| 1802 |
+
produce_structures_summary_choice (str): Choice to produce structured summaries.
|
| 1803 |
+
aws_access_key_text (str): AWS access key.
|
| 1804 |
+
aws_secret_key_text (str): AWS secret key.
|
| 1805 |
+
hf_api_key_text (str): Hugging Face API key.
|
| 1806 |
+
azure_api_key_text (str): Azure API key.
|
| 1807 |
+
output_folder (str, optional): Folder to save output files. Defaults to OUTPUT_FOLDER.
|
| 1808 |
+
merge_sentiment (str, optional): Whether to merge sentiment. Defaults to "No".
|
| 1809 |
+
merge_general_topics (str, optional): Whether to merge general topics. Defaults to "Yes".
|
| 1810 |
+
score_threshold (int, optional): Score threshold for topic matching. Defaults to 90.
|
| 1811 |
+
summarise_format (str, optional): Format for summarization. Defaults to "".
|
| 1812 |
+
random_seed (int, optional): Random seed for reproducibility. Defaults to 42.
|
| 1813 |
+
log_files_output_list_state (List[str], optional): List of log file paths. Defaults to list().
|
| 1814 |
+
model_name_map_state (dict, optional): Mapping of model names. Defaults to model_name_map.
|
| 1815 |
+
usage_logs_location (str, optional): Location for usage logs. Defaults to "".
|
| 1816 |
+
existing_logged_content (list, optional): Existing logged content. Defaults to list().
|
| 1817 |
+
add_existing_topics_summary_format (str, optional): Summary format for adding existing topics. Defaults to "".
|
| 1818 |
+
model (object, optional): Loaded local model object. Defaults to None.
|
| 1819 |
+
tokenizer (object, optional): Loaded local tokenizer object. Defaults to None.
|
| 1820 |
+
assistant_model (object, optional): Loaded local assistant model object. Defaults to None.
|
| 1821 |
+
max_rows (int, optional): Maximum number of rows to process. Defaults to max_rows.
|
| 1822 |
+
progress (Progress, optional): Gradio Progress object for tracking. Defaults to Progress(track_tqdm=True).
|
| 1823 |
+
|
| 1824 |
+
Returns:
|
| 1825 |
+
A tuple matching the UI components updated during the original chained flow.
|
| 1826 |
"""
|
| 1827 |
|
| 1828 |
# Load local model if it's not already loaded
|
|
|
|
| 1903 |
model=model,
|
| 1904 |
tokenizer=tokenizer,
|
| 1905 |
assistant_model=assistant_model,
|
| 1906 |
+
max_rows=max_rows,
|
| 1907 |
+
add_existing_topics_summary_format=add_existing_topics_summary_format
|
| 1908 |
)
|
| 1909 |
|
| 1910 |
total_input_tokens += out_input_tokens
|
tools/llm_funcs.py
CHANGED
|
@@ -4,14 +4,11 @@ import re
|
|
| 4 |
import time
|
| 5 |
import boto3
|
| 6 |
import pandas as pd
|
| 7 |
-
import json
|
| 8 |
-
import spaces
|
| 9 |
from tqdm import tqdm
|
| 10 |
from huggingface_hub import hf_hub_download
|
| 11 |
from typing import List, Tuple, TypeVar
|
| 12 |
from google import genai as ai
|
| 13 |
from google.genai import types
|
| 14 |
-
import gradio as gr
|
| 15 |
from gradio import Progress
|
| 16 |
|
| 17 |
from azure.ai.inference import ChatCompletionsClient
|
|
@@ -26,15 +23,12 @@ _model = None
|
|
| 26 |
_tokenizer = None
|
| 27 |
_assistant_model = None
|
| 28 |
|
| 29 |
-
from tools.config import
|
| 30 |
-
from tools.prompts import initial_table_assistant_prefill
|
| 31 |
from tools.helper_functions import _get_env_list
|
| 32 |
|
| 33 |
if SPECULATIVE_DECODING == "True": SPECULATIVE_DECODING = True
|
| 34 |
else: SPECULATIVE_DECODING = False
|
| 35 |
|
| 36 |
-
if USE_SPECULATIVE_DECODING == "True": USE_SPECULATIVE_DECODING = True
|
| 37 |
-
else: USE_SPECULATIVE_DECODING = False
|
| 38 |
|
| 39 |
if isinstance(NUM_PRED_TOKENS, str): NUM_PRED_TOKENS = int(NUM_PRED_TOKENS)
|
| 40 |
if isinstance(LLM_MAX_GPU_LAYERS, str): LLM_MAX_GPU_LAYERS = int(LLM_MAX_GPU_LAYERS)
|
|
@@ -186,6 +180,7 @@ def load_model(local_model_type:str=CHOSEN_LOCAL_MODEL_TYPE,
|
|
| 186 |
compile_mode=COMPILE_MODE,
|
| 187 |
model_dtype=MODEL_DTYPE,
|
| 188 |
hf_token=HF_TOKEN,
|
|
|
|
| 189 |
model=None,
|
| 190 |
tokenizer=None,
|
| 191 |
assistant_model=None):
|
|
@@ -205,6 +200,7 @@ def load_model(local_model_type:str=CHOSEN_LOCAL_MODEL_TYPE,
|
|
| 205 |
compile_mode (str): The compilation mode to use for the model.
|
| 206 |
model_dtype (str): The data type to use for the model.
|
| 207 |
hf_token (str): The Hugging Face token to use for the model.
|
|
|
|
| 208 |
model (Llama/transformers model): The model to load.
|
| 209 |
tokenizer (list/transformers tokenizer): The tokenizer to load.
|
| 210 |
assistant_model (transformers model): The assistant model for speculative decoding.
|
|
@@ -212,7 +208,7 @@ def load_model(local_model_type:str=CHOSEN_LOCAL_MODEL_TYPE,
|
|
| 212 |
tuple: A tuple containing:
|
| 213 |
- model (Llama/transformers model): The loaded Llama.cpp/transformers model instance.
|
| 214 |
- tokenizer (list/transformers tokenizer): An empty list (tokenizer is not used with Llama.cpp directly in this setup), or a transformers tokenizer.
|
| 215 |
-
- assistant_model (transformers model): The assistant model for speculative decoding (if
|
| 216 |
'''
|
| 217 |
|
| 218 |
if model:
|
|
@@ -263,9 +259,9 @@ def load_model(local_model_type:str=CHOSEN_LOCAL_MODEL_TYPE,
|
|
| 263 |
try:
|
| 264 |
print("GPU load variables:" , vars(gpu_config))
|
| 265 |
if speculative_decoding:
|
| 266 |
-
model = Llama(model_path=model_path, type_k=
|
| 267 |
else:
|
| 268 |
-
model = Llama(model_path=model_path, type_k=
|
| 269 |
|
| 270 |
except Exception as e:
|
| 271 |
print("GPU load failed due to:", e, "Loading model in CPU mode")
|
|
@@ -397,7 +393,7 @@ def load_model(local_model_type:str=CHOSEN_LOCAL_MODEL_TYPE,
|
|
| 397 |
print("GPU layers assigned to cuda:", gpu_layers)
|
| 398 |
|
| 399 |
# Load assistant model for speculative decoding if enabled
|
| 400 |
-
if
|
| 401 |
print("Loading assistant model for speculative decoding:", ASSISTANT_MODEL)
|
| 402 |
try:
|
| 403 |
from transformers import AutoModelForCausalLM
|
|
@@ -764,7 +760,7 @@ def call_aws_claude(prompt: str, system_prompt: str, temperature: float, max_tok
|
|
| 764 |
|
| 765 |
return response
|
| 766 |
|
| 767 |
-
def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCPPGenerationConfig, model=None, tokenizer=None, assistant_model=None, progress=Progress(track_tqdm=False)):
|
| 768 |
"""
|
| 769 |
This function sends a request to a transformers model (through Unsloth) with the given prompt, system prompt, and generation configuration.
|
| 770 |
"""
|
|
@@ -774,7 +770,7 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
|
|
| 774 |
model = get_model()
|
| 775 |
if tokenizer is None:
|
| 776 |
tokenizer = get_tokenizer()
|
| 777 |
-
if assistant_model is None and
|
| 778 |
assistant_model = get_assistant_model()
|
| 779 |
|
| 780 |
if model is None or tokenizer is None:
|
|
@@ -784,10 +780,17 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
|
|
| 784 |
def wrap_text_message(text):
|
| 785 |
return [{"type": "text", "text": text}]
|
| 786 |
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 791 |
#print("Conversation:", conversation)
|
| 792 |
#import pprint
|
| 793 |
#pprint.pprint(conversation)
|
|
@@ -812,7 +815,7 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
|
|
| 812 |
|
| 813 |
# Map LlamaCPP parameters to transformers parameters
|
| 814 |
generation_kwargs = {
|
| 815 |
-
'
|
| 816 |
'temperature': gen_config.temperature,
|
| 817 |
'top_p': gen_config.top_p,
|
| 818 |
'top_k': gen_config.top_k,
|
|
@@ -834,7 +837,7 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
|
|
| 834 |
start_time = time.time()
|
| 835 |
|
| 836 |
# Use speculative decoding if assistant model is available
|
| 837 |
-
if
|
| 838 |
print("Using speculative decoding with assistant model")
|
| 839 |
outputs = model.generate(
|
| 840 |
input_ids,
|
|
@@ -853,7 +856,7 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
|
|
| 853 |
end_time = time.time()
|
| 854 |
|
| 855 |
# --- Decode and Display Results ---
|
| 856 |
-
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 857 |
# To get only the model's reply, we can decode just the newly generated tokens
|
| 858 |
new_tokens = outputs[0][input_ids.shape[-1]:]
|
| 859 |
assistant_reply = tokenizer.decode(new_tokens, skip_special_tokens=True)
|
|
@@ -883,6 +886,7 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
|
|
| 883 |
full_prompt = "Conversation history:\n"
|
| 884 |
num_transformer_input_tokens = 0
|
| 885 |
num_transformer_generated_tokens = 0
|
|
|
|
| 886 |
|
| 887 |
for entry in conversation_history:
|
| 888 |
role = entry['role'].capitalize() # Assuming the history is stored with 'role' and 'parts'
|
|
@@ -915,7 +919,7 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
|
|
| 915 |
time.sleep(timeout_wait)
|
| 916 |
|
| 917 |
if i == number_of_api_retry_attempts:
|
| 918 |
-
return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
|
| 919 |
|
| 920 |
elif "AWS" in model_source:
|
| 921 |
for i in progress_bar:
|
|
@@ -931,7 +935,7 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
|
|
| 931 |
time.sleep(timeout_wait)
|
| 932 |
|
| 933 |
if i == number_of_api_retry_attempts:
|
| 934 |
-
return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
|
| 935 |
elif "Azure" in model_source:
|
| 936 |
for i in progress_bar:
|
| 937 |
try:
|
|
@@ -960,7 +964,7 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
|
|
| 960 |
print("Call to Azure model failed:", e, " Waiting for ", str(timeout_wait), "seconds and trying again.")
|
| 961 |
time.sleep(timeout_wait)
|
| 962 |
if i == number_of_api_retry_attempts:
|
| 963 |
-
return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
|
| 964 |
elif "Local" in model_source:
|
| 965 |
# This is the local model
|
| 966 |
for i in progress_bar:
|
|
@@ -986,10 +990,10 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
|
|
| 986 |
time.sleep(timeout_wait)
|
| 987 |
|
| 988 |
if i == number_of_api_retry_attempts:
|
| 989 |
-
return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
|
| 990 |
else:
|
| 991 |
print("Model source not recognised")
|
| 992 |
-
return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history
|
| 993 |
|
| 994 |
# Update the conversation history with the new prompt and response
|
| 995 |
conversation_history.append({'role': 'user', 'parts': [prompt]})
|
|
@@ -998,19 +1002,17 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
|
|
| 998 |
if isinstance(response, ResponseObject):
|
| 999 |
response_text = response.text
|
| 1000 |
elif 'choices' in response: # LLama.cpp model response
|
| 1001 |
-
if "gpt-oss" in model_choice:
|
| 1002 |
-
|
| 1003 |
-
else:
|
| 1004 |
-
response_text = response['choices'][0]['message']['content']
|
| 1005 |
-
response_text = response_text.strip()
|
| 1006 |
elif model_source == "Gemini":
|
| 1007 |
response_text = response.text
|
| 1008 |
-
response_text = response_text.strip()
|
| 1009 |
else: # Assume transformers model response
|
| 1010 |
-
if "gpt-oss" in model_choice:
|
| 1011 |
-
|
| 1012 |
-
|
| 1013 |
-
|
|
|
|
|
|
|
| 1014 |
|
| 1015 |
conversation_history.append({'role': 'assistant', 'parts': [response_text]})
|
| 1016 |
|
|
|
|
| 4 |
import time
|
| 5 |
import boto3
|
| 6 |
import pandas as pd
|
|
|
|
|
|
|
| 7 |
from tqdm import tqdm
|
| 8 |
from huggingface_hub import hf_hub_download
|
| 9 |
from typing import List, Tuple, TypeVar
|
| 10 |
from google import genai as ai
|
| 11 |
from google.genai import types
|
|
|
|
| 12 |
from gradio import Progress
|
| 13 |
|
| 14 |
from azure.ai.inference import ChatCompletionsClient
|
|
|
|
| 23 |
_tokenizer = None
|
| 24 |
_assistant_model = None
|
| 25 |
|
| 26 |
+
from tools.config import LLM_TEMPERATURE, LLM_TOP_K, LLM_MIN_P, LLM_TOP_P, LLM_REPETITION_PENALTY, LLM_LAST_N_TOKENS, LLM_MAX_NEW_TOKENS, LLM_SEED, LLM_RESET, LLM_STREAM, LLM_THREADS, LLM_BATCH_SIZE, LLM_CONTEXT_LENGTH, LLM_SAMPLE, TIMEOUT_WAIT, NUMBER_OF_RETRY_ATTEMPTS, MAX_TIME_FOR_LOOP, BATCH_SIZE_DEFAULT, DEDUPLICATION_THRESHOLD, MAX_COMMENT_CHARS, CHOSEN_LOCAL_MODEL_TYPE, LOCAL_REPO_ID, LOCAL_MODEL_FILE, LOCAL_MODEL_FOLDER, HF_TOKEN, LLM_SEED, LLM_MAX_GPU_LAYERS, SPECULATIVE_DECODING, NUM_PRED_TOKENS, USE_LLAMA_CPP, COMPILE_MODE, MODEL_DTYPE, USE_BITSANDBYTES, COMPILE_TRANSFORMERS, INT8_WITH_OFFLOAD_TO_CPU, LOAD_LOCAL_MODEL_AT_START, ASSISTANT_MODEL, LLM_STOP_STRINGS, MULTIMODAL_PROMPT_FORMAT, KV_QUANT_LEVEL
|
|
|
|
| 27 |
from tools.helper_functions import _get_env_list
|
| 28 |
|
| 29 |
if SPECULATIVE_DECODING == "True": SPECULATIVE_DECODING = True
|
| 30 |
else: SPECULATIVE_DECODING = False
|
| 31 |
|
|
|
|
|
|
|
| 32 |
|
| 33 |
if isinstance(NUM_PRED_TOKENS, str): NUM_PRED_TOKENS = int(NUM_PRED_TOKENS)
|
| 34 |
if isinstance(LLM_MAX_GPU_LAYERS, str): LLM_MAX_GPU_LAYERS = int(LLM_MAX_GPU_LAYERS)
|
|
|
|
| 180 |
compile_mode=COMPILE_MODE,
|
| 181 |
model_dtype=MODEL_DTYPE,
|
| 182 |
hf_token=HF_TOKEN,
|
| 183 |
+
speculative_decoding=speculative_decoding,
|
| 184 |
model=None,
|
| 185 |
tokenizer=None,
|
| 186 |
assistant_model=None):
|
|
|
|
| 200 |
compile_mode (str): The compilation mode to use for the model.
|
| 201 |
model_dtype (str): The data type to use for the model.
|
| 202 |
hf_token (str): The Hugging Face token to use for the model.
|
| 203 |
+
speculative_decoding (bool): Whether to use speculative decoding.
|
| 204 |
model (Llama/transformers model): The model to load.
|
| 205 |
tokenizer (list/transformers tokenizer): The tokenizer to load.
|
| 206 |
assistant_model (transformers model): The assistant model for speculative decoding.
|
|
|
|
| 208 |
tuple: A tuple containing:
|
| 209 |
- model (Llama/transformers model): The loaded Llama.cpp/transformers model instance.
|
| 210 |
- tokenizer (list/transformers tokenizer): An empty list (tokenizer is not used with Llama.cpp directly in this setup), or a transformers tokenizer.
|
| 211 |
+
- assistant_model (transformers model): The assistant model for speculative decoding (if speculative_decoding is True).
|
| 212 |
'''
|
| 213 |
|
| 214 |
if model:
|
|
|
|
| 259 |
try:
|
| 260 |
print("GPU load variables:" , vars(gpu_config))
|
| 261 |
if speculative_decoding:
|
| 262 |
+
model = Llama(model_path=model_path, type_k=KV_QUANT_LEVEL, type_v=KV_QUANT_LEVEL, flash_attn=True, draft_model=LlamaPromptLookupDecoding(num_pred_tokens=NUM_PRED_TOKENS), **vars(gpu_config))
|
| 263 |
else:
|
| 264 |
+
model = Llama(model_path=model_path, type_k=KV_QUANT_LEVEL, type_v=KV_QUANT_LEVEL, flash_attn=True, **vars(gpu_config))
|
| 265 |
|
| 266 |
except Exception as e:
|
| 267 |
print("GPU load failed due to:", e, "Loading model in CPU mode")
|
|
|
|
| 393 |
print("GPU layers assigned to cuda:", gpu_layers)
|
| 394 |
|
| 395 |
# Load assistant model for speculative decoding if enabled
|
| 396 |
+
if speculative_decoding and USE_LLAMA_CPP == "False" and torch_device == "cuda":
|
| 397 |
print("Loading assistant model for speculative decoding:", ASSISTANT_MODEL)
|
| 398 |
try:
|
| 399 |
from transformers import AutoModelForCausalLM
|
|
|
|
| 760 |
|
| 761 |
return response
|
| 762 |
|
| 763 |
+
def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCPPGenerationConfig, model=None, tokenizer=None, assistant_model=None, speculative_decoding=speculative_decoding, progress=Progress(track_tqdm=False)):
|
| 764 |
"""
|
| 765 |
This function sends a request to a transformers model (through Unsloth) with the given prompt, system prompt, and generation configuration.
|
| 766 |
"""
|
|
|
|
| 770 |
model = get_model()
|
| 771 |
if tokenizer is None:
|
| 772 |
tokenizer = get_tokenizer()
|
| 773 |
+
if assistant_model is None and speculative_decoding:
|
| 774 |
assistant_model = get_assistant_model()
|
| 775 |
|
| 776 |
if model is None or tokenizer is None:
|
|
|
|
| 780 |
def wrap_text_message(text):
|
| 781 |
return [{"type": "text", "text": text}]
|
| 782 |
|
| 783 |
+
if MULTIMODAL_PROMPT_FORMAT == "True":
|
| 784 |
+
conversation = [
|
| 785 |
+
{"role": "system", "content": wrap_text_message(system_prompt)},
|
| 786 |
+
{"role": "user", "content": wrap_text_message(prompt)}
|
| 787 |
+
]
|
| 788 |
+
|
| 789 |
+
else:
|
| 790 |
+
conversation = [
|
| 791 |
+
{"role": "system", "content": system_prompt},
|
| 792 |
+
{"role": "user", "content": prompt}
|
| 793 |
+
]
|
| 794 |
#print("Conversation:", conversation)
|
| 795 |
#import pprint
|
| 796 |
#pprint.pprint(conversation)
|
|
|
|
| 815 |
|
| 816 |
# Map LlamaCPP parameters to transformers parameters
|
| 817 |
generation_kwargs = {
|
| 818 |
+
'max_new_tokens': gen_config.max_tokens,
|
| 819 |
'temperature': gen_config.temperature,
|
| 820 |
'top_p': gen_config.top_p,
|
| 821 |
'top_k': gen_config.top_k,
|
|
|
|
| 837 |
start_time = time.time()
|
| 838 |
|
| 839 |
# Use speculative decoding if assistant model is available
|
| 840 |
+
if speculative_decoding and assistant_model is not None:
|
| 841 |
print("Using speculative decoding with assistant model")
|
| 842 |
outputs = model.generate(
|
| 843 |
input_ids,
|
|
|
|
| 856 |
end_time = time.time()
|
| 857 |
|
| 858 |
# --- Decode and Display Results ---
|
| 859 |
+
#generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 860 |
# To get only the model's reply, we can decode just the newly generated tokens
|
| 861 |
new_tokens = outputs[0][input_ids.shape[-1]:]
|
| 862 |
assistant_reply = tokenizer.decode(new_tokens, skip_special_tokens=True)
|
|
|
|
| 886 |
full_prompt = "Conversation history:\n"
|
| 887 |
num_transformer_input_tokens = 0
|
| 888 |
num_transformer_generated_tokens = 0
|
| 889 |
+
response_text = ""
|
| 890 |
|
| 891 |
for entry in conversation_history:
|
| 892 |
role = entry['role'].capitalize() # Assuming the history is stored with 'role' and 'parts'
|
|
|
|
| 919 |
time.sleep(timeout_wait)
|
| 920 |
|
| 921 |
if i == number_of_api_retry_attempts:
|
| 922 |
+
return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens
|
| 923 |
|
| 924 |
elif "AWS" in model_source:
|
| 925 |
for i in progress_bar:
|
|
|
|
| 935 |
time.sleep(timeout_wait)
|
| 936 |
|
| 937 |
if i == number_of_api_retry_attempts:
|
| 938 |
+
return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens
|
| 939 |
elif "Azure" in model_source:
|
| 940 |
for i in progress_bar:
|
| 941 |
try:
|
|
|
|
| 964 |
print("Call to Azure model failed:", e, " Waiting for ", str(timeout_wait), "seconds and trying again.")
|
| 965 |
time.sleep(timeout_wait)
|
| 966 |
if i == number_of_api_retry_attempts:
|
| 967 |
+
return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens
|
| 968 |
elif "Local" in model_source:
|
| 969 |
# This is the local model
|
| 970 |
for i in progress_bar:
|
|
|
|
| 990 |
time.sleep(timeout_wait)
|
| 991 |
|
| 992 |
if i == number_of_api_retry_attempts:
|
| 993 |
+
return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens
|
| 994 |
else:
|
| 995 |
print("Model source not recognised")
|
| 996 |
+
return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens
|
| 997 |
|
| 998 |
# Update the conversation history with the new prompt and response
|
| 999 |
conversation_history.append({'role': 'user', 'parts': [prompt]})
|
|
|
|
| 1002 |
if isinstance(response, ResponseObject):
|
| 1003 |
response_text = response.text
|
| 1004 |
elif 'choices' in response: # LLama.cpp model response
|
| 1005 |
+
if "gpt-oss" in model_choice: response_text = response['choices'][0]['message']['content'].split('<|start|>assistant<|channel|>final<|message|>')[1]
|
| 1006 |
+
else: response_text = response['choices'][0]['message']['content']
|
|
|
|
|
|
|
|
|
|
| 1007 |
elif model_source == "Gemini":
|
| 1008 |
response_text = response.text
|
|
|
|
| 1009 |
else: # Assume transformers model response
|
| 1010 |
+
if "gpt-oss" in model_choice: response_text = response.split('<|start|>assistant<|channel|>final<|message|>')[1]
|
| 1011 |
+
else: response_text = response
|
| 1012 |
+
|
| 1013 |
+
# Replace multiple spaces with single space
|
| 1014 |
+
response_text = re.sub(r' {2,}', ' ', response_text)
|
| 1015 |
+
response_text = response_text.strip()
|
| 1016 |
|
| 1017 |
conversation_history.append({'role': 'assistant', 'parts': [response_text]})
|
| 1018 |
|
tools/prompts.py
CHANGED
|
@@ -8,12 +8,16 @@ initial_table_system_prompt = system_prompt + markdown_additional_prompt
|
|
| 8 |
|
| 9 |
initial_table_assistant_prefill = "|"
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
initial_table_prompt = """Your task is to create one new markdown table based on open text responses in the reponse table below with the headings 'General topic', 'Subtopic', 'Sentiment', 'Response References', and 'Summary'.
|
| 12 |
In the first column identify general topics relevant to responses. Create as many general topics as you can.
|
| 13 |
In the second column list subtopics relevant to responses. Make the subtopics as specific as possible and make sure they cover every issue mentioned. The subtopic should never be blank or empty.
|
| 14 |
{sentiment_choices}.
|
| 15 |
-
In the fourth column
|
| 16 |
-
In the fifth column, write a
|
| 17 |
Do not add any other columns. Do not add any other text to your response.
|
| 18 |
|
| 19 |
Response table:
|
|
@@ -46,8 +50,8 @@ force_single_topic_prompt = """ Assign each response to one single topic only.""
|
|
| 46 |
add_existing_topics_prompt = """Your task is to create one new markdown table, assigning responses from the Response table below to topics.
|
| 47 |
{topic_assignment}{force_single_topic}
|
| 48 |
{sentiment_choices}.
|
| 49 |
-
In the fourth column
|
| 50 |
-
In the fifth column, write a
|
| 51 |
Do not add any other columns. Do not add any other text to your response.
|
| 52 |
|
| 53 |
Responses are shown in the following Response table:
|
|
|
|
| 8 |
|
| 9 |
initial_table_assistant_prefill = "|"
|
| 10 |
|
| 11 |
+
default_response_reference_format = "list each specific Response reference number that is relevant to the Subtopic, separated by commas. Do no write any other text in this column."
|
| 12 |
+
|
| 13 |
+
single_response_reference_format = "'Response References' write the number 1 alongside each subtopic and no other text."
|
| 14 |
+
|
| 15 |
initial_table_prompt = """Your task is to create one new markdown table based on open text responses in the reponse table below with the headings 'General topic', 'Subtopic', 'Sentiment', 'Response References', and 'Summary'.
|
| 16 |
In the first column identify general topics relevant to responses. Create as many general topics as you can.
|
| 17 |
In the second column list subtopics relevant to responses. Make the subtopics as specific as possible and make sure they cover every issue mentioned. The subtopic should never be blank or empty.
|
| 18 |
{sentiment_choices}.
|
| 19 |
+
In the fourth column {response_reference_format}
|
| 20 |
+
In the fifth column, write a summary of the subtopic based on relevant responses - highlight specific issues that appear. {add_existing_topics_summary_format}
|
| 21 |
Do not add any other columns. Do not add any other text to your response.
|
| 22 |
|
| 23 |
Response table:
|
|
|
|
| 50 |
add_existing_topics_prompt = """Your task is to create one new markdown table, assigning responses from the Response table below to topics.
|
| 51 |
{topic_assignment}{force_single_topic}
|
| 52 |
{sentiment_choices}.
|
| 53 |
+
In the fourth column {response_reference_format}
|
| 54 |
+
In the fifth column, write a summary of the Subtopic based on relevant responses - highlight specific issues that appear. {add_existing_topics_summary_format}
|
| 55 |
Do not add any other columns. Do not add any other text to your response.
|
| 56 |
|
| 57 |
Responses are shown in the following Response table:
|
tools/verify_titles.py
CHANGED
|
@@ -492,17 +492,17 @@ def verify_titles(in_data_file,
|
|
| 492 |
|
| 493 |
# Write outputs to csv
|
| 494 |
## Topics with references
|
| 495 |
-
new_topic_df.to_csv(topic_table_out_path, index=None)
|
| 496 |
log_files_output_paths.append(topic_table_out_path)
|
| 497 |
|
| 498 |
## Reference table mapping response numbers to topics
|
| 499 |
-
new_reference_df.to_csv(reference_table_out_path, index=None)
|
| 500 |
out_file_paths.append(reference_table_out_path)
|
| 501 |
|
| 502 |
## Unique topic list
|
| 503 |
new_unique_topics_df = pd.concat([new_unique_topics_df, existing_unique_topics_df]) #.drop_duplicates('Subtopic')
|
| 504 |
|
| 505 |
-
new_unique_topics_df.to_csv(unique_topics_df_out_path, index=None)
|
| 506 |
out_file_paths.append(unique_topics_df_out_path)
|
| 507 |
|
| 508 |
# Outputs for markdown table output
|
|
@@ -536,7 +536,7 @@ def verify_titles(in_data_file,
|
|
| 536 |
|
| 537 |
formatted_initial_table_system_prompt = system_prompt.format(consultation_context=context_textbox, column_name=chosen_cols)
|
| 538 |
|
| 539 |
-
formatted_initial_table_prompt = initial_table_prompt.format(response_table=normalised_simple_markdown_table)
|
| 540 |
|
| 541 |
if prompt2: formatted_prompt2 = prompt2.format(response_table=normalised_simple_markdown_table)
|
| 542 |
else: formatted_prompt2 = prompt2
|
|
@@ -561,16 +561,16 @@ def verify_titles(in_data_file,
|
|
| 561 |
# If error in table parsing, leave function
|
| 562 |
if is_error == True: raise Exception("Error in output table parsing")
|
| 563 |
|
| 564 |
-
topic_table_df.to_csv(topic_table_out_path, index=None)
|
| 565 |
out_file_paths.append(topic_table_out_path)
|
| 566 |
|
| 567 |
-
reference_df.to_csv(reference_table_out_path, index=None)
|
| 568 |
out_file_paths.append(reference_table_out_path)
|
| 569 |
|
| 570 |
## Unique topic list
|
| 571 |
new_unique_topics_df = pd.concat([new_unique_topics_df, existing_unique_topics_df])
|
| 572 |
|
| 573 |
-
new_unique_topics_df.to_csv(unique_topics_df_out_path, index=None)
|
| 574 |
out_file_paths.append(unique_topics_df_out_path)
|
| 575 |
|
| 576 |
whole_conversation_metadata.append(whole_conversation_metadata_str)
|
|
@@ -672,14 +672,14 @@ def verify_titles(in_data_file,
|
|
| 672 |
basic_response_data_out_path = output_folder + file_path_details + "_simplified_data_file_" + model_choice_clean + "_temp_" + str(temperature) + ".csv"
|
| 673 |
|
| 674 |
## Reference table mapping response numbers to topics
|
| 675 |
-
existing_reference_df.to_csv(reference_table_out_path, index=None)
|
| 676 |
out_file_paths.append(reference_table_out_path)
|
| 677 |
|
| 678 |
# Create final unique topics table from reference table to ensure consistent numbers
|
| 679 |
final_out_unique_topics_df = existing_unique_topics_df #create_topic_summary_df_from_reference_table(existing_reference_df)
|
| 680 |
|
| 681 |
## Unique topic list
|
| 682 |
-
final_out_unique_topics_df.to_csv(unique_topics_df_out_path, index=None)
|
| 683 |
out_file_paths.append(unique_topics_df_out_path)
|
| 684 |
|
| 685 |
# Ensure that we are only returning the final results to outputs
|
|
@@ -696,7 +696,7 @@ def verify_titles(in_data_file,
|
|
| 696 |
basic_response_data = get_basic_response_data(file_data, chosen_cols, verify_titles=True)
|
| 697 |
|
| 698 |
# Save simplified file data to log outputs
|
| 699 |
-
pd.DataFrame(basic_response_data).to_csv(basic_response_data_out_path, index=None)
|
| 700 |
log_files_output_paths.append(basic_response_data_out_path)
|
| 701 |
|
| 702 |
# Step 1: Identify missing references
|
|
@@ -713,7 +713,7 @@ def verify_titles(in_data_file,
|
|
| 713 |
#print("missing_df:", missing_df)
|
| 714 |
|
| 715 |
missing_df_out_path = output_folder + file_path_details + "_missing_references_" + model_choice_clean + "_temp_" + str(temperature) + ".csv"
|
| 716 |
-
missing_df.to_csv(missing_df_out_path, index=None)
|
| 717 |
log_files_output_paths.append(missing_df_out_path)
|
| 718 |
|
| 719 |
out_file_paths = list(set(out_file_paths))
|
|
|
|
| 492 |
|
| 493 |
# Write outputs to csv
|
| 494 |
## Topics with references
|
| 495 |
+
new_topic_df.to_csv(topic_table_out_path, index=None, encoding='utf-8-sig')
|
| 496 |
log_files_output_paths.append(topic_table_out_path)
|
| 497 |
|
| 498 |
## Reference table mapping response numbers to topics
|
| 499 |
+
new_reference_df.to_csv(reference_table_out_path, index=None, encoding='utf-8-sig')
|
| 500 |
out_file_paths.append(reference_table_out_path)
|
| 501 |
|
| 502 |
## Unique topic list
|
| 503 |
new_unique_topics_df = pd.concat([new_unique_topics_df, existing_unique_topics_df]) #.drop_duplicates('Subtopic')
|
| 504 |
|
| 505 |
+
new_unique_topics_df.to_csv(unique_topics_df_out_path, index=None, encoding='utf-8-sig')
|
| 506 |
out_file_paths.append(unique_topics_df_out_path)
|
| 507 |
|
| 508 |
# Outputs for markdown table output
|
|
|
|
| 536 |
|
| 537 |
formatted_initial_table_system_prompt = system_prompt.format(consultation_context=context_textbox, column_name=chosen_cols)
|
| 538 |
|
| 539 |
+
formatted_initial_table_prompt = initial_table_prompt.format(response_table=normalised_simple_markdown_table, add_existing_topics_summary_format=add_existing_topics_summary_format)
|
| 540 |
|
| 541 |
if prompt2: formatted_prompt2 = prompt2.format(response_table=normalised_simple_markdown_table)
|
| 542 |
else: formatted_prompt2 = prompt2
|
|
|
|
| 561 |
# If error in table parsing, leave function
|
| 562 |
if is_error == True: raise Exception("Error in output table parsing")
|
| 563 |
|
| 564 |
+
topic_table_df.to_csv(topic_table_out_path, index=None, encoding='utf-8-sig')
|
| 565 |
out_file_paths.append(topic_table_out_path)
|
| 566 |
|
| 567 |
+
reference_df.to_csv(reference_table_out_path, index=None, encoding='utf-8-sig')
|
| 568 |
out_file_paths.append(reference_table_out_path)
|
| 569 |
|
| 570 |
## Unique topic list
|
| 571 |
new_unique_topics_df = pd.concat([new_unique_topics_df, existing_unique_topics_df])
|
| 572 |
|
| 573 |
+
new_unique_topics_df.to_csv(unique_topics_df_out_path, index=None, encoding='utf-8-sig')
|
| 574 |
out_file_paths.append(unique_topics_df_out_path)
|
| 575 |
|
| 576 |
whole_conversation_metadata.append(whole_conversation_metadata_str)
|
|
|
|
| 672 |
basic_response_data_out_path = output_folder + file_path_details + "_simplified_data_file_" + model_choice_clean + "_temp_" + str(temperature) + ".csv"
|
| 673 |
|
| 674 |
## Reference table mapping response numbers to topics
|
| 675 |
+
existing_reference_df.to_csv(reference_table_out_path, index=None, encoding='utf-8-sig')
|
| 676 |
out_file_paths.append(reference_table_out_path)
|
| 677 |
|
| 678 |
# Create final unique topics table from reference table to ensure consistent numbers
|
| 679 |
final_out_unique_topics_df = existing_unique_topics_df #create_topic_summary_df_from_reference_table(existing_reference_df)
|
| 680 |
|
| 681 |
## Unique topic list
|
| 682 |
+
final_out_unique_topics_df.to_csv(unique_topics_df_out_path, index=None, encoding='utf-8-sig')
|
| 683 |
out_file_paths.append(unique_topics_df_out_path)
|
| 684 |
|
| 685 |
# Ensure that we are only returning the final results to outputs
|
|
|
|
| 696 |
basic_response_data = get_basic_response_data(file_data, chosen_cols, verify_titles=True)
|
| 697 |
|
| 698 |
# Save simplified file data to log outputs
|
| 699 |
+
pd.DataFrame(basic_response_data).to_csv(basic_response_data_out_path, index=None, encoding='utf-8-sig')
|
| 700 |
log_files_output_paths.append(basic_response_data_out_path)
|
| 701 |
|
| 702 |
# Step 1: Identify missing references
|
|
|
|
| 713 |
#print("missing_df:", missing_df)
|
| 714 |
|
| 715 |
missing_df_out_path = output_folder + file_path_details + "_missing_references_" + model_choice_clean + "_temp_" + str(temperature) + ".csv"
|
| 716 |
+
missing_df.to_csv(missing_df_out_path, index=None, encoding='utf-8-sig')
|
| 717 |
log_files_output_paths.append(missing_df_out_path)
|
| 718 |
|
| 719 |
out_file_paths = list(set(out_file_paths))
|
windows_install_llama-cpp-python.txt
CHANGED
|
@@ -77,13 +77,15 @@ set PKG_CONFIG_PATH=C:\<path-to-openblas>\OpenBLAS\lib\pkgconfig # Set this in e
|
|
| 77 |
|
| 78 |
pip install llama-cpp-python==0.3.16 --force-reinstall --verbose --no-cache-dir -Ccmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS;-DBLAS_INCLUDE_DIRS=C:/<path-to-openblas>/OpenBLAS/include;-DBLAS_LIBRARIES=C:/<path-to-openblas>/OpenBLAS/lib/libopenblas.lib"
|
| 79 |
|
|
|
|
|
|
|
| 80 |
or to make a wheel:
|
| 81 |
|
| 82 |
pip install llama-cpp-python==0.3.16 --wheel-dir dist --verbose --no-cache-dir -Ccmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS;-DBLAS_INCLUDE_DIRS=C:/<path-to-openblas>/OpenBLAS/include;-DBLAS_LIBRARIES=C:/<path-to-openblas>/OpenBLAS/lib/libopenblas.lib"
|
| 83 |
|
| 84 |
-
pip wheel llama-cpp-python==0.3.16 --wheel-dir dist --verbose --no-cache-dir -Ccmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS;-DBLAS_INCLUDE_DIRS=C:/Users
|
|
|
|
| 85 |
|
| 86 |
-
C:/Users/spedrickcase/libs
|
| 87 |
|
| 88 |
## With Cuda (NVIDIA GPUs only)
|
| 89 |
|
|
|
|
| 77 |
|
| 78 |
pip install llama-cpp-python==0.3.16 --force-reinstall --verbose --no-cache-dir -Ccmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS;-DBLAS_INCLUDE_DIRS=C:/<path-to-openblas>/OpenBLAS/include;-DBLAS_LIBRARIES=C:/<path-to-openblas>/OpenBLAS/lib/libopenblas.lib"
|
| 79 |
|
| 80 |
+
pip install llama-cpp-python==0.3.16 --verbose --no-cache-dir -Ccmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS;-DBLAS_INCLUDE_DIRS=C:/Users/s_cas/libs/OpenBLAS/include;-DBLAS_LIBRARIES=C:/Users/s_cas/OpenBLAS/lib/libopenblas.lib";-DPKG_CONFIG_PATH=C:/users/s_cas/openblas/lib/pkgconfig"
|
| 81 |
+
|
| 82 |
or to make a wheel:
|
| 83 |
|
| 84 |
pip install llama-cpp-python==0.3.16 --wheel-dir dist --verbose --no-cache-dir -Ccmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS;-DBLAS_INCLUDE_DIRS=C:/<path-to-openblas>/OpenBLAS/include;-DBLAS_LIBRARIES=C:/<path-to-openblas>/OpenBLAS/lib/libopenblas.lib"
|
| 85 |
|
| 86 |
+
pip wheel llama-cpp-python==0.3.16 --wheel-dir dist --verbose --no-cache-dir -Ccmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS;-DBLAS_INCLUDE_DIRS=C:/Users/<user>/libs/OpenBLAS/include;-DBLAS_LIBRARIES=C:/Users/<user>/libs/OpenBLAS/lib/libopenblas.lib"
|
| 87 |
+
|
| 88 |
|
|
|
|
| 89 |
|
| 90 |
## With Cuda (NVIDIA GPUs only)
|
| 91 |
|