Function to load the model and tokenizer

def load_model_and_tokenizer(model_name):
device = "cuda:0" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
model.eval()
model.to(device)
return tokenizer, model, device

Function to generate text using the model

def generate_text(model, tokenizer, device, prompt, max_new_tokens, temperature, top_p):
input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to(device)
with torch.no_grad():
generated_token_ids = model.generate(
inputs=input_ids,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p
)[0]
return tokenizer.decode(generated_token_ids)

Function to get the generate text using the model

def get_generated_text(data):
# Extract the user's prompt from the 'messages' array
prompts = [msg['content'] for msg in data['messages'] if msg['role'] == 'user']
if not prompts:
return "No user prompt found"
prompt = prompts[0] # Use the first user prompt

model_name = data.get('model_name', "C:/Users/rav/Downloads/Bot/Ai.SE/gpt-sw3-20b-instruct-4bit-gptq")
max_new_tokens = data.get('max_new_tokens', 100)
temperature = data.get('temperature', 0.6)
top_p = data.get('top_p', 1)

tokenizer, model, device = load_model_and_tokenizer(model_name)
return generate_text(model, tokenizer, device, prompt, max_new_tokens, temperature, top_p)

any help...

AI-Sweden-Models
/

gpt-sw3-20b-instruct-4bit-gptq

Error while using Locally

Function to load the model and tokenizer

Function to generate text using the model

Function to get the generate text using the model