Spaces:

Chamin09
/

ChatCSV

Sleeping

App Files Files Community

ChatCSV / models /llm_setup_bk.py

Chamin09

Rename models/llm_setup.py to models/llm_setup_bk.py

353df5d verified 4 months ago

raw

history blame contribute delete

2.84 kB

	# Updated import path
	#from llama_index.llms import HuggingFaceInferenceAPI
	#from llama_index.llms.huggingface import HuggingFaceLLM
	from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
	import torch
	# If that doesn't work, try:
	# from llama_index.llms.huggingface import HuggingFaceLLM

	def setup_llm(model_name: str = "microsoft/phi-3-mini-4k-instruct",
	device: str = None,
	context_window: int = 4096,
	max_new_tokens: int = 512):
	"""Set up the language model for the CSV chatbot."""
	# Determine device
	if device is None:
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Try the updated class
	try:
	# First attempt with new API
	from llama_index.llms.huggingface import HuggingFaceLLM

	# Configure model with appropriate parameters for HF Spaces
	model_kwargs = {
	"trust_remote_code": True,
	"torch_dtype": torch.float16,
	}

	if device == "cuda":
	from transformers import BitsAndBytesConfig
	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.float16
	)
	model_kwargs["quantization_config"] = quantization_config

	# Initialize LLM
	llm = HuggingFaceLLM(
	model_name=model_name,
	tokenizer_name=model_name,
	context_window=context_window,
	max_new_tokens=max_new_tokens,
	generate_kwargs={"temperature": 0.7, "top_p": 0.95},
	device_map=device,
	tokenizer_kwargs={"trust_remote_code": True},
	model_kwargs=model_kwargs,
	# Cache the model to avoid reloading
	cache_folder="./model_cache"
	)

	except (ImportError, AttributeError):
	# Fallback to other API options
	try:
	from llama_index.llms import HuggingFaceInferenceAPI

	llm = HuggingFaceInferenceAPI(
	model_name=model_name,
	tokenizer_name=model_name,
	context_window=context_window,
	max_new_tokens=max_new_tokens,
	generate_kwargs={"temperature": 0.7, "top_p": 0.95}
	)
	except:
	# Last resort - try the base LLM class
	from llama_index.llms.base import LLM
	from llama_index.llms.huggingface import HuggingFaceInference

	llm = HuggingFaceInference(
	model_name=model_name,
	tokenizer_name=model_name,
	context_window=context_window,
	max_new_tokens=max_new_tokens,
	generate_kwargs={"temperature": 0.7, "top_p": 0.95}
	)

	return llm