Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -18,7 +18,7 @@ bnb_config = BitsAndBytesConfig(
|
|
18 |
base_model_id = "google/gemma-7b"
|
19 |
base_model = AutoModelForCausalLM.from_pretrained(
|
20 |
base_model_id,
|
21 |
-
quantization_config=bnb_config,
|
22 |
device_map="auto",
|
23 |
trust_remote_code=True,
|
24 |
token=True,
|
@@ -31,8 +31,8 @@ ft_model = PeftModel.from_pretrained(base_model, "msinghy/gemma-7b-ft-80row-alpa
|
|
31 |
|
32 |
def respond(query):
|
33 |
eval_prompt = "###Input: " + query + "\n\n###Output: "
|
34 |
-
model_input = tokenizer(eval_prompt, return_tensors="pt")
|
35 |
-
output = ft_model.generate(input_ids=model_input["input_ids"]
|
36 |
attention_mask=model_input["attention_mask"],
|
37 |
max_new_tokens=500)
|
38 |
result = tokenizer.decode(output[0], skip_special_tokens=True).replace(eval_prompt, "")
|
|
|
18 |
base_model_id = "google/gemma-7b"
|
19 |
base_model = AutoModelForCausalLM.from_pretrained(
|
20 |
base_model_id,
|
21 |
+
#quantization_config=bnb_config,
|
22 |
device_map="auto",
|
23 |
trust_remote_code=True,
|
24 |
token=True,
|
|
|
31 |
|
32 |
def respond(query):
|
33 |
eval_prompt = "###Input: " + query + "\n\n###Output: "
|
34 |
+
model_input = tokenizer(eval_prompt, return_tensors="pt")#.to("cuda")
|
35 |
+
output = ft_model.generate(input_ids=model_input["input_ids"]#.to(device),
|
36 |
attention_mask=model_input["attention_mask"],
|
37 |
max_new_tokens=500)
|
38 |
result = tokenizer.decode(output[0], skip_special_tokens=True).replace(eval_prompt, "")
|