nafisneehal
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -87,17 +87,19 @@ def generate_response(system_instruction, user_input):
|
|
87 |
|
88 |
inputs = tokenizer([
|
89 |
alpaca_prompt.format(
|
90 |
-
|
91 |
-
|
92 |
"", # output - leave this blank for generation!
|
93 |
)
|
94 |
], return_tensors = "pt").to("cuda")
|
95 |
|
96 |
-
|
97 |
-
|
|
|
|
|
98 |
assistant_response = decoded_output
|
|
|
99 |
# tokenizer.batch_decode(outputs)
|
100 |
-
|
101 |
# # Generate model response
|
102 |
# with torch.no_grad():
|
103 |
# generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
|
|
|
87 |
|
88 |
inputs = tokenizer([
|
89 |
alpaca_prompt.format(
|
90 |
+
system_instruction, # instruction
|
91 |
+
user_input, # input
|
92 |
"", # output - leave this blank for generation!
|
93 |
)
|
94 |
], return_tensors = "pt").to("cuda")
|
95 |
|
96 |
+
with torch.no_grad():
|
97 |
+
outputs = model.generate(**inputs, max_new_tokens = 1000, use_cache = True)
|
98 |
+
|
99 |
+
decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
100 |
assistant_response = decoded_output
|
101 |
+
|
102 |
# tokenizer.batch_decode(outputs)
|
|
|
103 |
# # Generate model response
|
104 |
# with torch.no_grad():
|
105 |
# generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
|