Spaces:
Sleeping
Sleeping
Making app CPU compatible.
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ def get_model():
|
|
12 |
model = AutoModelForCausalLM.from_pretrained(
|
13 |
model_id,
|
14 |
low_cpu_mem_usage=True,
|
15 |
-
torch_dtype=torch.float16,
|
16 |
# load_in_4bit=True,
|
17 |
)
|
18 |
|
@@ -35,8 +35,9 @@ if user_input and button:
|
|
35 |
### Response:
|
36 |
"""
|
37 |
st.write("Prompt: ", user_input)
|
38 |
-
input = tokenizer(
|
39 |
-
|
|
|
40 |
# input_ids = tokenizer(prompt, return_tensors="pt", truncation=True)
|
41 |
# outputs = model.generate(input_ids=input_ids, pad_token_id=tokenizer.eos_token_id, max_new_tokens=500, do_sample=True, top_p=0.75, temperature=0.95, top_k=15)
|
42 |
|
|
|
12 |
model = AutoModelForCausalLM.from_pretrained(
|
13 |
model_id,
|
14 |
low_cpu_mem_usage=True,
|
15 |
+
# torch_dtype=torch.float16,
|
16 |
# load_in_4bit=True,
|
17 |
)
|
18 |
|
|
|
35 |
### Response:
|
36 |
"""
|
37 |
st.write("Prompt: ", user_input)
|
38 |
+
input = tokenizer(prompt, padding=True, return_tensors="pt")
|
39 |
+
generate_ids = model.generate(input.input_ids, max_length=500, top_p=0.75, temperature=0.95, top_k=15)
|
40 |
+
output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
41 |
# input_ids = tokenizer(prompt, return_tensors="pt", truncation=True)
|
42 |
# outputs = model.generate(input_ids=input_ids, pad_token_id=tokenizer.eos_token_id, max_new_tokens=500, do_sample=True, top_p=0.75, temperature=0.95, top_k=15)
|
43 |
|