Spaces:
Runtime error
Runtime error
Oleg Lavrovsky
commited on
Chat output
Browse files
app.py
CHANGED
|
@@ -65,8 +65,8 @@ async def lifespan(app: FastAPI):
|
|
| 65 |
raise e
|
| 66 |
# Release resources when the app is stopped
|
| 67 |
yield
|
| 68 |
-
model.clear()
|
| 69 |
-
tokenizer.clear()
|
| 70 |
|
| 71 |
|
| 72 |
# Setup our app
|
|
@@ -114,6 +114,8 @@ async def predict(q: str):
|
|
| 114 |
messages_think,
|
| 115 |
tokenize=False,
|
| 116 |
add_generation_prompt=True,
|
|
|
|
|
|
|
| 117 |
)
|
| 118 |
model_inputs = tokenizer(
|
| 119 |
[text],
|
|
@@ -124,13 +126,13 @@ async def predict(q: str):
|
|
| 124 |
# Generate the output
|
| 125 |
generated_ids = model.generate(
|
| 126 |
**model_inputs,
|
| 127 |
-
top_p=0.9,
|
| 128 |
-
temperature=0.8,
|
| 129 |
max_new_tokens=512
|
| 130 |
)
|
| 131 |
|
| 132 |
# Get and decode the output
|
| 133 |
-
output_ids = generated_ids[0][
|
|
|
|
|
|
|
| 134 |
result = tokenizer.decode(output_ids, skip_special_tokens=True)
|
| 135 |
|
| 136 |
# Checkpoint
|
|
|
|
| 65 |
raise e
|
| 66 |
# Release resources when the app is stopped
|
| 67 |
yield
|
| 68 |
+
#model.clear()
|
| 69 |
+
#tokenizer.clear()
|
| 70 |
|
| 71 |
|
| 72 |
# Setup our app
|
|
|
|
| 114 |
messages_think,
|
| 115 |
tokenize=False,
|
| 116 |
add_generation_prompt=True,
|
| 117 |
+
top_p=0.9,
|
| 118 |
+
temperature=0.8,
|
| 119 |
)
|
| 120 |
model_inputs = tokenizer(
|
| 121 |
[text],
|
|
|
|
| 126 |
# Generate the output
|
| 127 |
generated_ids = model.generate(
|
| 128 |
**model_inputs,
|
|
|
|
|
|
|
| 129 |
max_new_tokens=512
|
| 130 |
)
|
| 131 |
|
| 132 |
# Get and decode the output
|
| 133 |
+
output_ids = generated_ids[0][-1]
|
| 134 |
+
logger.debug(output_ids)
|
| 135 |
+
#[len(model_inputs.input_ids[0]) :]
|
| 136 |
result = tokenizer.decode(output_ids, skip_special_tokens=True)
|
| 137 |
|
| 138 |
# Checkpoint
|