Spaces:

Chamin09
/

ChatCSV

Sleeping

File size: 2,835 Bytes

# Updated import path
#from llama_index.llms import HuggingFaceInferenceAPI
#from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import torch
# If that doesn't work, try:
# from llama_index.llms.huggingface import HuggingFaceLLM

def setup_llm(model_name: str = "microsoft/phi-3-mini-4k-instruct", 
              device: str = None,
              context_window: int = 4096,
              max_new_tokens: int = 512):
    """Set up the language model for the CSV chatbot."""
    # Determine device
    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Try the updated class
    try:
        # First attempt with new API
        from llama_index.llms.huggingface import HuggingFaceLLM
        
        # Configure model with appropriate parameters for HF Spaces
        model_kwargs = {
            "trust_remote_code": True,
            "torch_dtype": torch.float16,
        }
        
        if device == "cuda":
            from transformers import BitsAndBytesConfig
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_compute_dtype=torch.float16
            )
            model_kwargs["quantization_config"] = quantization_config
        
        # Initialize LLM
        llm = HuggingFaceLLM(
            model_name=model_name,
            tokenizer_name=model_name,
            context_window=context_window,
            max_new_tokens=max_new_tokens,
            generate_kwargs={"temperature": 0.7, "top_p": 0.95},
            device_map=device,
            tokenizer_kwargs={"trust_remote_code": True},
            model_kwargs=model_kwargs,
            # Cache the model to avoid reloading
            cache_folder="./model_cache"
        )
        
    except (ImportError, AttributeError):
        # Fallback to other API options
        try:
            from llama_index.llms import HuggingFaceInferenceAPI
            
            llm = HuggingFaceInferenceAPI(
                model_name=model_name,
                tokenizer_name=model_name,
                context_window=context_window,
                max_new_tokens=max_new_tokens,
                generate_kwargs={"temperature": 0.7, "top_p": 0.95}
            )
        except:
            # Last resort - try the base LLM class
            from llama_index.llms.base import LLM
            from llama_index.llms.huggingface import HuggingFaceInference
            
            llm = HuggingFaceInference(
                model_name=model_name,
                tokenizer_name=model_name,
                context_window=context_window,
                max_new_tokens=max_new_tokens,
                generate_kwargs={"temperature": 0.7, "top_p": 0.95}
            )
    
    return llm