Chengxb888 commited on
Commit
1cd6700
1 Parent(s): 6c88388

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -22,14 +22,14 @@ def say_hello(msg: Annotated[str, Form()]):
22
  # for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
23
  model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
24
 
25
- messages = [{"role": "user", "content": "things about elasticsearch"}]
26
  input_text=tokenizer.apply_chat_template(messages, tokenize=False)
27
  print(input_text)
28
 
29
  input_ids = tokenizer(msg, return_tensors="pt").to("cpu")
30
  print("output")
31
  inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
32
- outputs = model.generate(inputs, max_new_tokens=32, temperature=0.6, top_p=0.92, do_sample=True)
33
 
34
  print("complete")
35
  return {"message": tokenizer.decode(outputs[0])}
 
22
  # for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
23
  model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
24
 
25
+ messages = [{"role": "user", "content": msg}]
26
  input_text=tokenizer.apply_chat_template(messages, tokenize=False)
27
  print(input_text)
28
 
29
  input_ids = tokenizer(msg, return_tensors="pt").to("cpu")
30
  print("output")
31
  inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
32
+ outputs = model.generate(inputs, max_new_tokens=256, temperature=0.6, top_p=0.92, do_sample=True)
33
 
34
  print("complete")
35
  return {"message": tokenizer.decode(outputs[0])}