Spaces:

ChangranHuuu
/

task-caching-v1

Sleeping

App Files Files Community

task-caching-v1 / dynamic_cheatsheet /language_model.py

ChangranHuuu

Update dynamic_cheatsheet/language_model.py

e9b8926 verified about 2 months ago

raw

history blame contribute delete

17.2 kB

	import numpy as np
	import tiktoken
	from typing import List, Tuple
	from sklearn.metrics.pairwise import cosine_similarity
	from .utils.execute_code import extract_and_run_python_code
	from .utils.extractor import extract_answer, extract_cheatsheet
	from litellm import completion
	from functools import partial
	import litellm
	import os # Added for SAMBANOVA env vars

	litellm._turn_on_debug()

	class LanguageModel:
	def __init__(self,
	model_name: str,
	) -> None:
	"""
	LanguageModel class to interact with different language models.

	Arguments:
	model_name : str : The name of the language model to use.

	Raises:
	ValueError : If the model name is not found or supported.
	"""

	self.model_name = model_name

	# Known model list (remains the same)
	known_model_list = [
	"openai/gpt-4o-mini", "openai/gpt-4o-mini-2024-07-18",
	"openai/gpt-4o", "openai/gpt-4o-2024-08-06", "openai/gpt-4o-2024-11-20",
	"openai/gpt-3.5-turbo",
	"together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo",
	"meta-llama/Llama-3.3-70B-Instruct-Turbo",
	"openai/o3-mini", "openai/o3-mini-2025-01-31",
	"openai/o1", "openai/o1-2024-12-17",
	"anthropic/claude-3-5-sonnet-latest", "anthropic/claude-3-5-sonnet-20241022",
	"anthropic/claude-3-5-haiku-latest", "anthropic/claude-3-5-haiku-20241022",
	"anthropic/claude-3-7-sonnet-latest", "anthropic/claude-3-7-sonnet-20250219",
	"together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
	"together_ai/deepseek-ai/DeepSeek-R1",
	"together_ai/deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
	"together_ai/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
	"together_ai/Qwen/Qwen2.5-Coder-32B-Instruct",
	"together_ai/Qwen/QwQ-32B",
	"together_ai/Qwen/Qwen2-72B-Instruct",
	"together_ai/Qwen/Qwen2.5-7B-Instruct-Turbo",
	"together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo",
	"gemini/gemini-2.0-flash",
	"ollama/llama3:70b",
	]

	# Load the client for the model based on the model name
	if self.model_name.startswith("sambanova/"):
	samba_api_key = os.environ.get("SAMBANOVA_API_KEY")
	samba_base_url = os.environ.get("SAMBANOVA_BASE_URL", "https://api.sambanova.ai/v1") # Default if not set
	if not samba_api_key:
	raise ValueError("SAMBANOVA_API_KEY environment variable not set for SambaNova model.")
	# For SambaNova (OpenAI compatible), explicitly pass api_key and api_base
	# The model name for litellm should be just the model identifier, not the full "samba/" prefix if api_base is provided.
	# However, litellm docs suggest that for OpenAI compatible endpoints, the model name passed to `completion`
	# should be what the endpoint expects. The `model` param in `partial` here is the one sent in the request body.
	# The `custom_llm_provider` in litellm is another way, but direct params are simpler for OpenAI compatibility.
	# Let's try keeping the model name as is (e.g. "samba/DeepSeek-R1-Distill-Llama-70B")
	# and provide api_key and api_base. LiteLLM should use these for any model if provided.
	# If this doesn't work, the model name might need to be stripped of "samba/" if api_base is set.
	# According to LiteLLM docs, for custom OpenAI-compatible endpoints, you can pass `base_url` and `api_key`.
	# The `model` parameter to `litellm.completion` will be the actual model ID the endpoint expects.
	# The `self.model_name` here is e.g. "samba/DeepSeek-R1-Distill-Llama-70B".
	# We need to ensure the `model` argument to `completion` is just "DeepSeek-R1-Distill-Llama-70B"
	# and set `custom_llm_provider="openai"` along with `api_base` and `api_key`.
	# Or, if SambaNova is a recognized provider by a different name in litellm, use that.
	# Given the error, litellm is not recognizing "samba/" as a provider directly.
	# The simp actual_model_name = self.model_name.split("samba/", 1)[1] if "samba/" in self.model_name else self.model_name
	actual_model_name = self.model_name.split("sambanova/", 1)[1] if "sambanova/" in self.model_name else self.model_name
	self.client = partial(completion,
	model=actual_model_name,
	api_key=samba_api_key,
	api_base=samba_base_url,
	custom_llm_provider="openai"
	)
	print(f"Initialized SambaNova model '{actual_model_name}' via custom OpenAI provider settings with api_base: {samba_base_url}")
	elif self.model_name in known_model_list:
	self.client = partial(completion, model=self.model_name)
	else:
	print(f"Warning: Model '{self.model_name}' not in explicit list and does not start with recognized prefixes. Attempting to initialize with litellm directly.")
	try:
	self.client = partial(completion, model=self.model_name)
	print(f"Successfully initialized model '{self.model_name}' via litellm fallback.")
	except Exception as e: raise ValueError(f"Model '{self.model_name}' is not in the known list, does not start with recognized prefixes, and could not be initialized by litellm directly: {{e}}")
	self.gpt4Tokenizer = tiktoken.encoding_for_model("gpt-4o")

	def count_tokens(self, text: str) -> int:
	"""
	Count the number of tokens in the text.
	"""
	tokens = self.gpt4Tokenizer.encode(text)
	return len(tokens)

	def generate(self,
	history: List[str],
	temperature: float = 0.1,
	max_tokens: int = 2048,
	current_depth: int = 1,
	max_depth_num_rounds: int = 3,
	allow_code_execution: bool = True,
	code_execution_flag: str = "EXECUTE CODE!",
	final_output: str = ""
	) -> str:
	"""
	Generate a response from the language model.
	"""
	if len(history) == 0:
	raise ValueError("History must contain at least one message.")

	print('history\n', history)
	# from litellm import num_tokens_from_messages
	# tokens_num = num_tokens_from_messages(
	# messages=history,
	# model=self.model_name
	# )
	try:
	token_count = litellm.token_counter(model_name=self.model_name, messages=history)
	print(f"DEBUG: litellm token_counter for '{model_name}' estimates: {token_count} tokens")
	except Exception as e:
	print(f"DEBUG: Error using litellm.token_counter: {e}")


	# The self.client is already a partial function with model, api_key, base_url, etc., pre-filled for SambaNova
	response = self.client(
	messages=history,
	# model=self.model_name, # This is now part of the partial self.client for SambaNova
	temperature=temperature,
	max_tokens=max_tokens, # litellm uses max_tokens or max_completion_tokens
	)
	output = response.choices[0].message.content # Corrected access to content

	print('output\n', output)

	pre_code_execution_flag = output.split(code_execution_flag)[0].strip()
	if allow_code_execution and code_execution_flag in output and pre_code_execution_flag.endswith("```"):
	output_prefix = output.split(code_execution_flag)[0].strip()
	executed_code = extract_and_run_python_code(output_prefix)
	executed_code = executed_code.strip()
	current_output = f"{output_prefix}\n{code_execution_flag}\n\n{executed_code}"
	final_output = f"{final_output}\n\n{current_output}".strip()

	if current_depth <= max_depth_num_rounds:
	warning_txt = ""
	if current_depth == max_depth_num_rounds:
	warning_txt = f" (This is the last round. No more code execution will be allowed. Please present your final solution now.)"
	new_messages = [
	{"role": "assistant", "content": current_output},
	{"role": "user", "content": f"Proceed with any additional steps required and provide the completed solution. If everything is already complete, type FINAL ANSWER and submit it in the expected format. If you are stuck, please try alternative methods to solve the problem and provide the final solution.{warning_txt}"}
	]
	history += new_messages
	return self.generate(
	history=history,
	temperature=temperature,
	max_tokens=max_tokens,
	current_depth=current_depth+1,
	max_depth_num_rounds=max_depth_num_rounds,
	allow_code_execution=allow_code_execution,
	code_execution_flag=code_execution_flag,
	final_output=final_output,
	)
	else:
	return final_output
	else:
	final_output = f"{final_output}\n\n{output}".strip()
	return final_output

	def advanced_generate(self,
	approach_name: str,
	input_txt: str,
	cheatsheet: str = None,
	generator_template: str = None,
	cheatsheet_template: str = None,
	temperature: float = 0.0,
	max_tokens: int = 2048,
	max_num_rounds: int = 1,
	allow_code_execution: bool = True,
	code_execution_flag: str = "EXECUTE CODE!",
	add_previous_answers_to_cheatsheet: bool = True,
	original_input_corpus: List[str] = None,
	original_input_embeddings: np.ndarray = None,
	generator_outputs_so_far: List[str] = None,
	retrieve_top_k: int = 3,
	) -> dict:
	"""
	Generate a response from the language model.
	Returns dict instead of Tuple for clarity.
	"""

	if approach_name == "default":
	generator_prompt = generator_template.replace("[[QUESTION]]", input_txt).replace("[[CHEATSHEET]]", "(empty)")
	generator_history = [
	{"role": "user", "content": generator_prompt},
	]
	generator_output = self.generate(
	history=generator_history,
	temperature=temperature,
	max_tokens=max_tokens,
	allow_code_execution=allow_code_execution,
	code_execution_flag=code_execution_flag,
	)
	generator_answer = extract_answer(generator_output)
	return {
	"input_txt": input_txt,
	"steps": [
	{
	"round": 0,
	"generator_prompt": generator_prompt,
	"generator_output": generator_output,
	"generator_answer": generator_answer,
	"current_cheatsheet": None,
	"new_cheatsheet": None,
	}
	],
	"previous_answers": None,
	"final_answer": generator_answer,
	"final_output": generator_output,
	"final_cheatsheet": None,
	}

	elif approach_name == "DynamicCheatsheet_Cumulative":
	if cheatsheet is None:
	raise ValueError("Cheatsheet must be provided for DynamicCheatsheet_Cumulative approach.")
	if generator_template is None or cheatsheet_template is None:
	raise ValueError("Generator and Cheatsheet templates must be provided for DynamicCheatsheet_Cumulative approach.")

	steps = []
	previous_answers = []
	current_cheatsheet_in_round = cheatsheet # Use a local var for the loop

	for round_num in range(max(1, max_num_rounds)):
	generator_cheatsheet_content = current_cheatsheet_in_round
	if round_num > 0 and add_previous_answers_to_cheatsheet and previous_answers:
	previous_answers_txt = f"PREVIOUS ANSWERS:\n{'; '.join(previous_answers)}"
	generator_cheatsheet_content = f"{generator_cheatsheet_content}\n\n{previous_answers_txt}"

	generator_prompt = generator_template.replace("[[QUESTION]]", input_txt).replace("[[CHEATSHEET]]", generator_cheatsheet_content)

	generator_history = [{"role": "user", "content": generator_prompt}]
	generator_output = self.generate(
	history=generator_history,
	temperature=temperature,
	max_tokens=max_tokens,
	allow_code_execution=allow_code_execution,
	code_execution_flag=code_execution_flag,
	)
	generator_answer = extract_answer(generator_output)

	cheatsheet_prompt = cheatsheet_template.replace("[[QUESTION]]", input_txt).replace("[[MODEL_ANSWER]]", generator_output).replace("[[PREVIOUS_CHEATSHEET]]", current_cheatsheet_in_round)
	cheatsheet_history = [{"role": "user", "content": cheatsheet_prompt}]
	# Pass explicit provider details for curator model if it's also SambaNova
	# Assuming curator uses the same model instance for now.
	cheatsheet_model_output = self.generate(
	history=cheatsheet_history,
	temperature=temperature,
	max_tokens=2*max_tokens, # As per original
	allow_code_execution=False,
	)
	new_cheatsheet = extract_cheatsheet(response=cheatsheet_model_output, old_cheatsheet=current_cheatsheet_in_round)

	steps.append({
	"round": round_num,
	"generator_prompt": generator_prompt,
	"generator_output": generator_output,
	"generator_answer": generator_answer,
	"current_cheatsheet": current_cheatsheet_in_round,
	"new_cheatsheet": new_cheatsheet,
	})
	current_cheatsheet_in_round = new_cheatsheet # Update for next potential round
	if generator_answer:
	previous_answers.append(f"Round {round_num+1}: {generator_answer}")

	print("input_txt", input_txt)
	print("steps", steps)
	print("previous_answers", previous_answers)
	print("final_answer", generator_answer)
	print("final_cheatsheet", current_cheatsheet_in_round)
	print("final_output", generator_output)

	return {
	"input_txt": input_txt,
	"steps": steps,
	"previous_answers": previous_answers,
	"final_answer": generator_answer, # Answer from the last round
	"final_cheatsheet": current_cheatsheet_in_round, # Cheatsheet from the last round
	"final_output": generator_output, # Full output from the last generator call
	}
	elif approach_name == "FullHistoryAppending":
	length_of_history = len(generator_outputs_so_far) if generator_outputs_so_far else 0
	curated_cheatsheet = "(empty)"
	if length_of_history > 0 and original_input_corpus and generator_outputs_so_far:
	curated_cheatsheet = "### PREVIOUS SOLUTIONS (START)\n\n"
	for i, (prev_input, prev_output) in enumerate(zip(original_input_corpus[:length_of_history], generator_outputs_so_far[:length_of_history])):
	curated_cheatsheet += f"#### Previous Input #{i+1}:\n\n{prev_input}\n\n#### Model Solution to Previous Input #{i+1}:\n\n{prev_output}\n---\n---\n\n"
	curated_cheatsheet += "#### PREVIOUS SOLUTIONS (END)"

	generator_prompt = generator_template.replace("[[QUESTION]]", input_txt).replace("[[CHEATSHEET]]", curated_cheatsheet)
	generator_history = [{"role": "user", "content": generator_prompt}]
	generator_output = self.generate(
	history=generator_history,
	temperature=temperature,
	max_tokens=max_tokens,
	allow_code_execution=allow_code_execution,
	code_execution_flag=code_execution_flag,
	)
	generator_answer = extract_answer(generator_output)
	return {
	"input_txt": input_txt,
	"steps": [],
	"previous_answers": [],
	"final_answer": generator_answer,
	"final_cheatsheet": curated_cheatsheet,
	"final_output": generator_output,
	}
	else:
	raise ValueError(f"Unknown approach_name: {approach_name}")