eneSadi's picture
dolandırıldım
ca386d6 unverified
raw
history blame
2.3 kB
import gradio as gr
import spaces
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from huggingface_hub import login
import os
access_token = os.getenv('HF_TOKEN')
login(access_token)
model_id = "google/gemma-2-9b-it"
print("Model loading started")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.bfloat16,
)
print("Model loading completed. Device of the model:", model.device)
"""
tokenizer = None
model = None
model_loaded = False # Flag to check if the model is loaded
@spaces.GPU
def load_model():
global tokenizer, model, model_loaded
if not model_loaded: # Load model only if it's not already loaded
print("Model loading started")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.bfloat16,
)
model_loaded = True
print("Model loading completed. Device of the model:", model.device)
return model, tokenizer
else:
print("Model is already loaded")
return model, tokenizer
"""
@spaces.GPU(duration=30)
def ask(prompt):
if not prompt:
return {"error": "Prompt is missing"}
#if not model_loaded:
# model, tokenizer = load_model() # Ensure the model is loaded before processing
model.to("cuda")
print("Device of the model:", model.device)
messages = [
{"role": "user", "content": f"{prompt}"},
]
print("Messages:", messages)
print("Tokenizer process started")
input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True).to("cuda")
print("Tokenizer process completed")
print("Model process started")
outputs = model.generate(**input_ids, max_new_tokens=256)
print("Tokenizer decode process started")
answer = tokenizer.decode(outputs[0])
print("Answer:", answer)
answer = answer.split("<end_of_turn>")[1].strip().replace("*", "")
print("Final answer:", answer)
return answer
demo = gr.Interface(fn=ask, inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), outputs=gr.Textbox())
demo.launch()