5to9 commited on
Commit
b6c4ccb
·
1 Parent(s): 75b1a69

0.12 catch exceptions

Browse files
Files changed (1) hide show
  1. app.py +98 -82
app.py CHANGED
@@ -4,7 +4,9 @@ import torch
4
  import gradio as gr
5
  import logging
6
  from huggingface_hub import login
 
7
  import os
 
8
 
9
  from threading import Thread
10
 
@@ -12,6 +14,8 @@ from threading import Thread
12
 
13
  logging.basicConfig(level=logging.DEBUG)
14
 
 
 
15
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
16
  login(token=HF_TOKEN)
17
 
@@ -51,97 +55,109 @@ def apply_chat_template(messages, add_generation_prompt=False):
51
 
52
  def load_model_a(model_id):
53
  global tokenizer_a, model_a, model_id_a
54
- model_id_a = model_id # need to access model_id with tokenizer
55
- tokenizer_a = AutoTokenizer.from_pretrained(model_id)
56
- logging.debug(f"***** model A eos_token: {tokenizer_a.eos_token}")
57
- model_a = AutoModelForCausalLM.from_pretrained(
58
- model_id,
59
- torch_dtype=torch_dtype,
60
- device_map="auto",
61
- trust_remote_code=True,
62
- ).eval()
 
 
 
63
  return gr.update(label=model_id)
64
 
 
65
  def load_model_b(model_id):
66
  global tokenizer_b, model_b, model_id_b
67
- model_id_b = model_id
68
- tokenizer_b = AutoTokenizer.from_pretrained(model_id)
69
- logging.debug(f"***** model B eos_token: {tokenizer_b.eos_token}")
70
- model_b = AutoModelForCausalLM.from_pretrained(
71
- model_id,
72
- torch_dtype=torch_dtype,
73
- device_map="auto",
74
- trust_remote_code=True,
75
- ).eval()
76
- model_b.tie_weights()
 
 
77
  return gr.update(label=model_id)
78
 
79
  @spaces.GPU()
80
  def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens=2048, temperature=0.2, top_p=0.9, repetition_penalty=1.1):
81
 
82
- text_streamer_a = TextIteratorStreamer(tokenizer_a, skip_prompt=True)
83
- text_streamer_b = TextIteratorStreamer(tokenizer_b, skip_prompt=True)
84
-
85
- system_prompt_list = [{"role": "system", "content": system_prompt}] if system_prompt else []
86
- input_text_list = [{"role": "user", "content": input_text}]
87
-
88
- chat_history_a = []
89
- for user, assistant in chatbot_a:
90
- chat_history_a.append({"role": "user", "content": user})
91
- chat_history_a.append({"role": "assistant", "content": assistant})
92
-
93
- chat_history_b = []
94
- for user, assistant in chatbot_b:
95
- chat_history_b.append({"role": "user", "content": user})
96
- chat_history_b.append({"role": "assistant", "content": assistant})
97
-
98
- new_messages_a = system_prompt_list + chat_history_a + input_text_list
99
- new_messages_b = system_prompt_list + chat_history_b + input_text_list
100
-
101
- input_ids_a = tokenizer_a.apply_chat_template(
102
- new_messages_a,
103
- add_generation_prompt=True,
104
- return_tensors="pt"
105
- ).to(model_a.device)
106
-
107
- input_ids_b = tokenizer_b.apply_chat_template(
108
- new_messages_b,
109
- add_generation_prompt=True,
110
- return_tensors="pt"
111
- ).to(model_b.device)
112
-
113
- generation_kwargs_a = dict(
114
- input_ids=input_ids_a,
115
- streamer=text_streamer_a,
116
- max_new_tokens=max_new_tokens,
117
- pad_token_id=tokenizer_a.eos_token_id,
118
- do_sample=True,
119
- temperature=temperature,
120
- top_p=top_p,
121
- repetition_penalty=repetition_penalty,
122
- )
123
- generation_kwargs_b = dict(
124
- input_ids=input_ids_b,
125
- streamer=text_streamer_b,
126
- max_new_tokens=max_new_tokens,
127
- pad_token_id=tokenizer_b.eos_token_id,
128
- do_sample=True,
129
- temperature=temperature,
130
- top_p=top_p,
131
- repetition_penalty=repetition_penalty,
132
- )
133
-
134
- thread_a = Thread(target=model_a.generate, kwargs=generation_kwargs_a)
135
- thread_b = Thread(target=model_b.generate, kwargs=generation_kwargs_b)
136
-
137
- thread_a.start()
138
- thread_b.start()
139
-
140
- chatbot_a.append([input_text, ""])
141
- chatbot_b.append([input_text, ""])
142
-
143
- finished_a = False
144
- finished_b = False
 
 
 
 
 
 
145
 
146
  while not (finished_a and finished_b):
147
  if not finished_a:
 
4
  import gradio as gr
5
  import logging
6
  from huggingface_hub import login
7
+
8
  import os
9
+ import traceback
10
 
11
  from threading import Thread
12
 
 
14
 
15
  logging.basicConfig(level=logging.DEBUG)
16
 
17
+ SPACER = '\n' + '*' * 40 + '\n'
18
+
19
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
20
  login(token=HF_TOKEN)
21
 
 
55
 
56
  def load_model_a(model_id):
57
  global tokenizer_a, model_a, model_id_a
58
+ try:
59
+ model_id_a = model_id # need to access model_id with tokenizer
60
+ tokenizer_a = AutoTokenizer.from_pretrained(model_id)
61
+ model_a = AutoModelForCausalLM.from_pretrained(
62
+ model_id,
63
+ torch_dtype=torch_dtype,
64
+ device_map="auto",
65
+ trust_remote_code=True,
66
+ ).eval()
67
+ except Exception as e:
68
+ logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
69
+
70
  return gr.update(label=model_id)
71
 
72
+
73
  def load_model_b(model_id):
74
  global tokenizer_b, model_b, model_id_b
75
+ try:
76
+ model_id_b = model_id
77
+ tokenizer_b = AutoTokenizer.from_pretrained(model_id)
78
+ logging.debug(f"***** model B eos_token: {tokenizer_b.eos_token}")
79
+ model_b = AutoModelForCausalLM.from_pretrained(
80
+ model_id,
81
+ torch_dtype=torch_dtype,
82
+ device_map="auto",
83
+ trust_remote_code=True,
84
+ ).eval()
85
+ except Exception as e:
86
+ logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
87
  return gr.update(label=model_id)
88
 
89
  @spaces.GPU()
90
  def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens=2048, temperature=0.2, top_p=0.9, repetition_penalty=1.1):
91
 
92
+ try:
93
+ text_streamer_a = TextIteratorStreamer(tokenizer_a, skip_prompt=True)
94
+ text_streamer_b = TextIteratorStreamer(tokenizer_b, skip_prompt=True)
95
+
96
+ system_prompt_list = [{"role": "system", "content": system_prompt}] if system_prompt else []
97
+ input_text_list = [{"role": "user", "content": input_text}]
98
+
99
+ chat_history_a = []
100
+ for user, assistant in chatbot_a:
101
+ chat_history_a.append({"role": "user", "content": user})
102
+ chat_history_a.append({"role": "assistant", "content": assistant})
103
+
104
+ chat_history_b = []
105
+ for user, assistant in chatbot_b:
106
+ chat_history_b.append({"role": "user", "content": user})
107
+ chat_history_b.append({"role": "assistant", "content": assistant})
108
+
109
+ new_messages_a = system_prompt_list + chat_history_a + input_text_list
110
+ new_messages_b = system_prompt_list + chat_history_b + input_text_list
111
+
112
+ input_ids_a = tokenizer_a.apply_chat_template(
113
+ new_messages_a,
114
+ add_generation_prompt=True,
115
+ return_tensors="pt"
116
+ ).to(model_a.device)
117
+
118
+ input_ids_b = tokenizer_b.apply_chat_template(
119
+ new_messages_b,
120
+ add_generation_prompt=True,
121
+ return_tensors="pt"
122
+ ).to(model_b.device)
123
+
124
+ logging.debug(f'model_a.device: {model_a.device}, model_b.device: {model_b.device}')
125
+
126
+ generation_kwargs_a = dict(
127
+ input_ids=input_ids_a,
128
+ streamer=text_streamer_a,
129
+ max_new_tokens=max_new_tokens,
130
+ pad_token_id=tokenizer_a.eos_token_id,
131
+ do_sample=True,
132
+ temperature=temperature,
133
+ top_p=top_p,
134
+ repetition_penalty=repetition_penalty,
135
+ )
136
+
137
+ generation_kwargs_b = dict(
138
+ input_ids=input_ids_b,
139
+ streamer=text_streamer_b,
140
+ max_new_tokens=max_new_tokens,
141
+ pad_token_id=tokenizer_b.eos_token_id,
142
+ do_sample=True,
143
+ temperature=temperature,
144
+ top_p=top_p,
145
+ repetition_penalty=repetition_penalty,
146
+ )
147
+
148
+ thread_a = Thread(target=model_a.generate, kwargs=generation_kwargs_a)
149
+ thread_b = Thread(target=model_b.generate, kwargs=generation_kwargs_b)
150
+
151
+ thread_a.start()
152
+ thread_b.start()
153
+
154
+ chatbot_a.append([input_text, ""])
155
+ chatbot_b.append([input_text, ""])
156
+
157
+ finished_a = False
158
+ finished_b = False
159
+ except Exception as e:
160
+ logging.error(f'{SPACER} Error: {e}, Traceback {traceback.format_exc()}')
161
 
162
  while not (finished_a and finished_b):
163
  if not finished_a: