tcml-chatglm-chat

Sleeping

Heng666 commited on Jan 18

Commit

95c51fd

•

1 Parent(s): 19b814b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,8 +16,9 @@ from threading import Thread
 # model.generation_config = GenerationConfig.from_pretrained(model_name_or_path)
 model_name_or_path = "scutcyr/BianQue-2"
 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,trust_remote_code=True)
-model = AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True)
 # using CUDA for an optimal experience
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -43,17 +44,17 @@ def predict(message, history):
     messages = "</s>".join(["</s>".join(["\n<|user|>:" + item[0], "\n<|assistant|>:" + item[1]])
                         for item in history_transformer_format])
     model_inputs = tokenizer([messages], return_tensors="pt").to(device)
-    streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
-        max_new_tokens=1024,
         do_sample=True,
-        top_p=0.95,
         top_k=50,
-        temperature=0.7,
         num_beams=1,
-        stopping_criteria=StoppingCriteriaList([stop])
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()  # Starting the generation in a separate thread.

 # model.generation_config = GenerationConfig.from_pretrained(model_name_or_path)
 model_name_or_path = "scutcyr/BianQue-2"
+model = AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True).half()
 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,trust_remote_code=True)
 # using CUDA for an optimal experience
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     messages = "</s>".join(["</s>".join(["\n<|user|>:" + item[0], "\n<|assistant|>:" + item[1]])
                         for item in history_transformer_format])
     model_inputs = tokenizer([messages], return_tensors="pt").to(device)
+    streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
+        max_new_tokens=2048,
         do_sample=True,
+        top_p=0.75,
         top_k=50,
+        temperature=0.95,
         num_beams=1,
+        # stopping_criteria=StoppingCriteriaList([stop]) 暫時拿掉
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()  # Starting the generation in a separate thread.