Spaces:

msinghy
/

Gemma-7b-ft-QLoRA-300-Alpaca

Runtime error

msinghy commited on Mar 16

Commit

5c9bbd9

•

1 Parent(s): b577899

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ bnb_config = BitsAndBytesConfig(
 base_model_id = "google/gemma-7b"
 base_model = AutoModelForCausalLM.from_pretrained(
     base_model_id,
-    quantization_config=bnb_config,
     device_map="auto",
     trust_remote_code=True,
     token=True,
@@ -31,8 +31,8 @@ ft_model = PeftModel.from_pretrained(base_model, "msinghy/gemma-7b-ft-80row-alpa
 def respond(query):
     eval_prompt = "###Input: " + query + "\n\n###Output: "
-    model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
-    output = ft_model.generate(input_ids=model_input["input_ids"].to(device),
                            attention_mask=model_input["attention_mask"],
                            max_new_tokens=500)
     result = tokenizer.decode(output[0], skip_special_tokens=True).replace(eval_prompt, "")

 base_model_id = "google/gemma-7b"
 base_model = AutoModelForCausalLM.from_pretrained(
     base_model_id,
+    #quantization_config=bnb_config,
     device_map="auto",
     trust_remote_code=True,
     token=True,
 def respond(query):
     eval_prompt = "###Input: " + query + "\n\n###Output: "
+    model_input = tokenizer(eval_prompt, return_tensors="pt")#.to("cuda")
+    output = ft_model.generate(input_ids=model_input["input_ids"]#.to(device),
                            attention_mask=model_input["attention_mask"],
                            max_new_tokens=500)
     result = tokenizer.decode(output[0], skip_special_tokens=True).replace(eval_prompt, "")