msinghy commited on
Commit
5c9bbd9
1 Parent(s): b577899

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -18,7 +18,7 @@ bnb_config = BitsAndBytesConfig(
18
  base_model_id = "google/gemma-7b"
19
  base_model = AutoModelForCausalLM.from_pretrained(
20
  base_model_id,
21
- quantization_config=bnb_config,
22
  device_map="auto",
23
  trust_remote_code=True,
24
  token=True,
@@ -31,8 +31,8 @@ ft_model = PeftModel.from_pretrained(base_model, "msinghy/gemma-7b-ft-80row-alpa
31
 
32
  def respond(query):
33
  eval_prompt = "###Input: " + query + "\n\n###Output: "
34
- model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
35
- output = ft_model.generate(input_ids=model_input["input_ids"].to(device),
36
  attention_mask=model_input["attention_mask"],
37
  max_new_tokens=500)
38
  result = tokenizer.decode(output[0], skip_special_tokens=True).replace(eval_prompt, "")
 
18
  base_model_id = "google/gemma-7b"
19
  base_model = AutoModelForCausalLM.from_pretrained(
20
  base_model_id,
21
+ #quantization_config=bnb_config,
22
  device_map="auto",
23
  trust_remote_code=True,
24
  token=True,
 
31
 
32
  def respond(query):
33
  eval_prompt = "###Input: " + query + "\n\n###Output: "
34
+ model_input = tokenizer(eval_prompt, return_tensors="pt")#.to("cuda")
35
+ output = ft_model.generate(input_ids=model_input["input_ids"]#.to(device),
36
  attention_mask=model_input["attention_mask"],
37
  max_new_tokens=500)
38
  result = tokenizer.decode(output[0], skip_special_tokens=True).replace(eval_prompt, "")