|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from dotenv import load_dotenv |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import torch |
|
|
|
load_dotenv() |
|
|
|
|
|
model = { |
|
"tokenizer": AutoTokenizer.from_pretrained("deepseek-ai/deepseek-coder-7b-instruct-v1.5"), |
|
"model": AutoModelForCausalLM.from_pretrained("deepseek-ai/deepseek-coder-7b-instruct-v1.5") |
|
} |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
def generate_response(prompt): |
|
try: |
|
coder_model_prompt = [ |
|
{"role": "user", "content": prompt} |
|
] |
|
encodeds = model["tokenizer"].apply_chat_template(coder_model_prompt, return_tensors="pt") |
|
|
|
model_inputs = encodeds.to(device) |
|
model['model'].to(device) |
|
|
|
generated_ids = model['model'].generate(model_inputs, max_new_tokens=500, do_sample=False,temperature=0.1,repetition_penalty=1) |
|
decoded = model["tokenizer"].batch_decode(generated_ids) |
|
return decoded[0].split('[/INST]')[-1].split('</s>')[0] |
|
except Exception as e: |
|
raise Exception("Error generating: ",e) from e |
|
|
|
|
|
|
|
|
|
|