Spaces:

loleg
/

fastapi-apertus

Runtime error

Oleg Lavrovsky commited on Sep 3

Commit

dcc5624

unverified ·

1 Parent(s): 4b1aae8

Chat output

Files changed (1) hide show

app.py CHANGED Viewed

@@ -65,8 +65,8 @@ async def lifespan(app: FastAPI):
         raise e
     # Release resources when the app is stopped
     yield
-    model.clear()
-    tokenizer.clear()
 # Setup our app
@@ -114,6 +114,8 @@ async def predict(q: str):
             messages_think,
             tokenize=False,
             add_generation_prompt=True,
         )
         model_inputs = tokenizer(
             [text],
@@ -124,13 +126,13 @@ async def predict(q: str):
         # Generate the output
         generated_ids = model.generate(
             **model_inputs,
-            top_p=0.9,
-            temperature=0.8,
             max_new_tokens=512
         )
         # Get and decode the output
-        output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :]
         result = tokenizer.decode(output_ids, skip_special_tokens=True)
         # Checkpoint

         raise e
     # Release resources when the app is stopped
     yield
+    #model.clear()
+    #tokenizer.clear()
 # Setup our app
             messages_think,
             tokenize=False,
             add_generation_prompt=True,
+            top_p=0.9,
+            temperature=0.8,
         )
         model_inputs = tokenizer(
             [text],
         # Generate the output
         generated_ids = model.generate(
             **model_inputs,
             max_new_tokens=512
         )
         # Get and decode the output
+        output_ids = generated_ids[0][-1]
+        logger.debug(output_ids)
+        #[len(model_inputs.input_ids[0]) :]
         result = tokenizer.decode(output_ids, skip_special_tokens=True)
         # Checkpoint