Mihaiii commited on
Commit
90f50bb
·
verified ·
1 Parent(s): 6156bdc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -10
app.py CHANGED
@@ -34,30 +34,26 @@ def create_chat_template_messages(history, prompt):
34
 
35
  return messages
36
 
37
- # Async function for generating responses using two models
38
  @spaces.GPU
39
- async def generate_responses(prompt, history):
40
  # Create messages array for chat history and apply template
41
  messages = create_chat_template_messages(history, prompt)
42
  wrapped_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_special_tokens=True, add_generation_prompt=True)
43
 
44
  #already has special tokens
45
  inputs = tokenizer.encode(wrapped_prompt, add_special_tokens=False, return_tensors="pt").to("cuda")
46
- # Standard sampler task
47
- standard_task = asyncio.to_thread(
48
- model1.generate, inputs, max_length=2048, temperature=1
49
- )
50
 
51
  # Custom sampler task: loop over generator and collect outputs in a list
52
  async def custom_sampler_task():
53
  generated_list = []
54
- generator = creative_sampler.generate(wrapped_prompt, max_length=2048, temperature=1)
55
  for token in generator:
56
  generated_list.append(token)
57
  return tokenizer.decode(generated_list, skip_special_tokens=True)
58
-
59
- # Wait for both responses
60
- standard_output, custom_output = await asyncio.gather(standard_task, custom_sampler_task())
61
  # Decode standard output and remove the prompt from the generated response
62
  standard_response = tokenizer.decode(standard_output[0][len(inputs[0]):], skip_special_tokens=True)
63
 
 
34
 
35
  return messages
36
 
 
37
  @spaces.GPU
38
+ def generate_responses(prompt, history):
39
  # Create messages array for chat history and apply template
40
  messages = create_chat_template_messages(history, prompt)
41
  wrapped_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_special_tokens=True, add_generation_prompt=True)
42
 
43
  #already has special tokens
44
  inputs = tokenizer.encode(wrapped_prompt, add_special_tokens=False, return_tensors="pt").to("cuda")
45
+ def standard_task():
46
+ return model1.generate(**inputs, max_length=2048, temperature=0.7)
 
 
47
 
48
  # Custom sampler task: loop over generator and collect outputs in a list
49
  async def custom_sampler_task():
50
  generated_list = []
51
+ generator = creative_sampler.generate(wrapped_prompt, max_length=2048, temperature=0.7)
52
  for token in generator:
53
  generated_list.append(token)
54
  return tokenizer.decode(generated_list, skip_special_tokens=True)
55
+ standard_output = standard_task()
56
+ custom_output = asyncio.run(custom_sampler_task())
 
57
  # Decode standard output and remove the prompt from the generated response
58
  standard_response = tokenizer.decode(standard_output[0][len(inputs[0]):], skip_special_tokens=True)
59