Oleg Lavrovsky commited on
Commit
dcc5624
·
unverified ·
1 Parent(s): 4b1aae8

Chat output

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -65,8 +65,8 @@ async def lifespan(app: FastAPI):
65
  raise e
66
  # Release resources when the app is stopped
67
  yield
68
- model.clear()
69
- tokenizer.clear()
70
 
71
 
72
  # Setup our app
@@ -114,6 +114,8 @@ async def predict(q: str):
114
  messages_think,
115
  tokenize=False,
116
  add_generation_prompt=True,
 
 
117
  )
118
  model_inputs = tokenizer(
119
  [text],
@@ -124,13 +126,13 @@ async def predict(q: str):
124
  # Generate the output
125
  generated_ids = model.generate(
126
  **model_inputs,
127
- top_p=0.9,
128
- temperature=0.8,
129
  max_new_tokens=512
130
  )
131
 
132
  # Get and decode the output
133
- output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :]
 
 
134
  result = tokenizer.decode(output_ids, skip_special_tokens=True)
135
 
136
  # Checkpoint
 
65
  raise e
66
  # Release resources when the app is stopped
67
  yield
68
+ #model.clear()
69
+ #tokenizer.clear()
70
 
71
 
72
  # Setup our app
 
114
  messages_think,
115
  tokenize=False,
116
  add_generation_prompt=True,
117
+ top_p=0.9,
118
+ temperature=0.8,
119
  )
120
  model_inputs = tokenizer(
121
  [text],
 
126
  # Generate the output
127
  generated_ids = model.generate(
128
  **model_inputs,
 
 
129
  max_new_tokens=512
130
  )
131
 
132
  # Get and decode the output
133
+ output_ids = generated_ids[0][-1]
134
+ logger.debug(output_ids)
135
+ #[len(model_inputs.input_ids[0]) :]
136
  result = tokenizer.decode(output_ids, skip_special_tokens=True)
137
 
138
  # Checkpoint