Spaces:

Basu03
/

personal_excel_interviewer

Sleeping

App Files Files Community

personal_excel_interviewer / src /local_llm_handler.py

Basu03

vertex ai minor bugs 4

2d12dcf 8 months ago

raw

history blame contribute delete

2.06 kB

	# src/local_llm_handler.py

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	from functools import lru_cache
	import os

	os.environ["TOKENIZERS_PARALLELISM"] = "false"

	@lru_cache(maxsize=1)
	def load_llm_pipeline():
	"""
	Loads and caches the local LLM pipeline using Phi-3-mini-4k-instruct.
	"""
	print("--- Loading main LLM: microsoft/Phi-3-mini-4k-instruct ---")
	model_name = "microsoft/phi-3-mini-4k-instruct"

	# Load tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="auto",
	torch_dtype="auto", # Use "auto" for better compatibility
	trust_remote_code=True
	)

	# Build text generation pipeline
	# CORRECTED: Added eos_token_id for cleaner, more reliable generation
	llm_pipeline = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=300,
	return_full_text=False,
	eos_token_id=tokenizer.eos_token_id # Crucial for stopping generation
	)

	print("--- Phi-3-mini model loaded successfully ---")
	return llm_pipeline

	def get_llm_response(prompt: str) -> str:
	"""
	Gets a response from the cached Phi-3-mini LLM pipeline.
	"""
	llm_pipeline = load_llm_pipeline()
	# Phi-3 uses a specific chat template format
	messages = [
	{"role": "user", "content": prompt},
	]
	# Use the tokenizer's built-in chat template for the most reliable formatting
	formatted_prompt = llm_pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	print("AI: (Generating response with Phi-3-mini...)")
	try:
	outputs = llm_pipeline(formatted_prompt)
	response = outputs[0]["generated_text"].strip()
	return response
	except Exception as e:
	print(f"Error during Phi-3-mini generation: {e}")
	return "Sorry, I encountered an error while generating a response."