Spaces:

simonraj
/

PersonalTrainer-Arnold

Running on Zero

App Files Files Community

simonraj commited on May 23

Commit

07c3d7c

•

1 Parent(s): fc80d39

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -30

app.py CHANGED Viewed

@@ -36,6 +36,8 @@ PLACEHOLDER = """
 def bot_streaming(message, history):
     print(f'message is - {message}')
     print(f'history is - {history}')
     if message["files"]:
         if type(message["files"][-1]) == dict:
             image = message["files"][-1]["path"]
@@ -45,45 +47,60 @@ def bot_streaming(message, history):
         for hist in history:
             if type(hist[0]) == tuple:
                 image = hist[0][0]
-    try:
-        if image is None:
-            raise gr.Error("You need to upload an image for Phi3-Vision to work. Close the error and try again with an Image.")
-    except NameError:
         raise gr.Error("You need to upload an image for Phi3-Vision to work. Close the error and try again with an Image.")
-    conversation = []
-    flag = False
-    for user, assistant in history:
-        if assistant is None:
-            flag = True
-            conversation.extend([{"role": "user", "content": ""}])
-            continue
-        if flag == True:
-            conversation[0]['content'] = f"<|image_1|>\n{user}"
-            conversation.extend([{"role": "assistant", "content": assistant}])
-            flag = False
-            continue
-        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     if len(history) == 0:
-        conversation.append({"role": "user", "content": f"<|image_1|>\n{message['text']}"})
     else:
-        conversation.append({"role": "user", "content": message['text']})
-    print(f"prompt is -\n{conversation}")
-    prompt = processor.tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
     image = Image.open(image)
-    inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
-    streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True, "skip_prompt": True, 'clean_up_tokenization_spaces': False,})
-    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=512, do_sample=False, temperature=0.0, eos_token_id=processor.tokenizer.eos_token_id,)
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    buffer = ""
-    for new_text in streamer:
-        buffer += new_text
-        yield buffer

 def bot_streaming(message, history):
     print(f'message is - {message}')
     print(f'history is - {history}')
+    image = None
     if message["files"]:
         if type(message["files"][-1]) == dict:
             image = message["files"][-1]["path"]
         for hist in history:
             if type(hist[0]) == tuple:
                 image = hist[0][0]
+    if image is None:
         raise gr.Error("You need to upload an image for Phi3-Vision to work. Close the error and try again with an Image.")
+    # Default prompt if no text is provided by the user
+    default_prompt_text = "Identify and provide coaching cues for this exercise."
+    # Custom system prompt to guide the model's responses
+    system_prompt = (
+        "As Arnold Schwarzenegger, analyze the image to identify the exercise being performed. "
+        "Provide detailed coaching tips to improve the form, focusing on posture and common errors. "
+        "Use motivational and energetic language. If the image does not show an exercise, respond with: "
+        "'What are you doing? This is no time for games! Upload a real exercise picture and let's pump it up!'"
+    )
+    # Create the conversation history for the prompt
+    conversation = []
     if len(history) == 0:
+        if message['text'].strip() == "":
+            conversation.append({"role": "user", "content": f"<|image_1|>\n{default_prompt_text}"})
+        else:
+            conversation.append({"role": "user", "content": f"<|image_1|>\n{message['text']}"})
     else:
+        for user, assistant in history:
+            conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
+        if message['text'].strip() == "":
+            conversation.append({"role": "user", "content": f"<|image_1|>\n{default_prompt_text}"})
+        else:
+            conversation.append({"role": "user", "content": f"<|image_1|>\n{message['text']}"})
+    # Format the prompt as specified in the Phi model guidelines
+    formatted_prompt = processor.tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
+    # Open the image and prepare inputs
     image = Image.open(image)
+    inputs = processor(formatted_prompt, images=image, return_tensors="pt").to("cuda:0")
+    # Define generation arguments
+    generation_args = {
+        "max_new_tokens": 280,
+        "temperature": 0.0,
+        "do_sample": False,
+        "eos_token_id": processor.tokenizer.eos_token_id,
+    }
+    # Generate the response
+    generate_ids = model.generate(**inputs, **generation_args)
+    # Process the generated IDs to get the response text
+    generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
+    response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+    yield response