Spaces:

Mihaiii
/

backtrack_sampler_demo

Running on Zero

Mihaiii commited on Oct 12, 2024

Commit

90f50bb

verified ·

1 Parent(s): 6156bdc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,30 +34,26 @@ def create_chat_template_messages(history, prompt):
     return messages
-# Async function for generating responses using two models
 @spaces.GPU
-async def generate_responses(prompt, history):
     # Create messages array for chat history and apply template
     messages = create_chat_template_messages(history, prompt)
     wrapped_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_special_tokens=True, add_generation_prompt=True)
     #already has special tokens
     inputs = tokenizer.encode(wrapped_prompt, add_special_tokens=False, return_tensors="pt").to("cuda")
-    # Standard sampler task
-    standard_task = asyncio.to_thread(
-        model1.generate, inputs, max_length=2048, temperature=1
-    )
     # Custom sampler task: loop over generator and collect outputs in a list
     async def custom_sampler_task():
         generated_list = []
-        generator = creative_sampler.generate(wrapped_prompt, max_length=2048, temperature=1)
         for token in generator:
             generated_list.append(token)
         return tokenizer.decode(generated_list, skip_special_tokens=True)
-    # Wait for both responses
-    standard_output, custom_output = await asyncio.gather(standard_task, custom_sampler_task())
     # Decode standard output and remove the prompt from the generated response
     standard_response = tokenizer.decode(standard_output[0][len(inputs[0]):], skip_special_tokens=True)

     return messages
 @spaces.GPU
+def generate_responses(prompt, history):
     # Create messages array for chat history and apply template
     messages = create_chat_template_messages(history, prompt)
     wrapped_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_special_tokens=True, add_generation_prompt=True)
     #already has special tokens
     inputs = tokenizer.encode(wrapped_prompt, add_special_tokens=False, return_tensors="pt").to("cuda")
+    def standard_task():
+        return model1.generate(**inputs, max_length=2048, temperature=0.7)
     # Custom sampler task: loop over generator and collect outputs in a list
     async def custom_sampler_task():
         generated_list = []
+        generator = creative_sampler.generate(wrapped_prompt, max_length=2048, temperature=0.7)
         for token in generator:
             generated_list.append(token)
         return tokenizer.decode(generated_list, skip_special_tokens=True)
+    standard_output = standard_task()
+    custom_output = asyncio.run(custom_sampler_task())
     # Decode standard output and remove the prompt from the generated response
     standard_response = tokenizer.decode(standard_output[0][len(inputs[0]):], skip_special_tokens=True)