acecalisto3 commited on
Commit
41a01ba
·
verified ·
1 Parent(s): c27556a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -1
app.py CHANGED
@@ -12,7 +12,49 @@ import atexit
12
  import subprocess
13
  from urllib.parse import urlparse, quote
14
  import webbrowser
15
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Constants
17
  INPUT_DIRECTORY = 'input'
18
  OUTPUT_DIRECTORY = 'output'
 
12
  import subprocess
13
  from urllib.parse import urlparse, quote
14
  import webbrowser
15
+ import spaces
16
+
17
+ device = "cuda"
18
+
19
+ @spaces.GPU()
20
+ def stream_chat(
21
+ message: str,
22
+ history: list,
23
+ system_prompt: str,
24
+ temperature: float = 0.5,
25
+ max_new_tokens: int = 16000,
26
+ top_p: float = 1.0,
27
+ top_k: int = 15,
28
+ penalty: float = 0.9,
29
+ ):
30
+ print(f'message: {message}')
31
+ print(f'history: {history}')
32
+
33
+ conversation = [
34
+ {"role": "system", "content": system_prompt}
35
+ ]
36
+ for prompt, answer in history:
37
+ conversation.extend([
38
+ {"role": "user", "content": prompt},
39
+ {"role": "assistant", "content": answer},
40
+ ])
41
+
42
+ conversation.append({"role": "user", "content": message})
43
+
44
+ input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
45
+
46
+ streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
47
+
48
+ generate_kwargs = dict(
49
+ input_ids=input_ids,
50
+ max_new_tokens = max_new_tokens,
51
+ do_sample = False if temperature == 0 else True,
52
+ top_p = top_p,
53
+ top_k = top_k,
54
+ temperature = temperature,
55
+ eos_token_id=[128001,128008,128009],
56
+ streamer=streamer,
57
+ )
58
  # Constants
59
  INPUT_DIRECTORY = 'input'
60
  OUTPUT_DIRECTORY = 'output'