import argparse import ast import os import random import sys import threading import time import torch import io import torch.nn.functional as F import wx import numpy as np import json from PIL import Image from torchvision import transforms from flask import Flask, Response from flask_cors import CORS from io import BytesIO sys.path.append(os.getcwd()) from tha3.mocap.ifacialmocap_constants import * from tha3.mocap.ifacialmocap_pose import create_default_ifacialmocap_pose from tha3.mocap.ifacialmocap_pose_converter import IFacialMocapPoseConverter from tha3.mocap.ifacialmocap_poser_converter_25 import create_ifacialmocap_pose_converter from tha3.poser.modes.load_poser import load_poser from tha3.poser.poser import Poser from tha3.util import ( torch_linear_to_srgb, resize_PIL_image, extract_PIL_image_from_filelike, extract_pytorch_image_from_PIL_image ) from typing import Optional # Global Variables global_source_image = None global_result_image = None global_reload = None is_talking_override = False is_talking = False global_timer_paused = False emotion = "neutral" lasttranisitiondPose = "NotInit" inMotion = False fps = 0 current_pose = None storepath = os.path.join(os.getcwd(), "talkinghead", "emotions") # Flask setup app = Flask(__name__) CORS(app) def setEmotion(_emotion): global emotion highest_score = float('-inf') highest_label = None for item in _emotion: if item['score'] > highest_score: highest_score = item['score'] highest_label = item['label'] #print("Applying ", emotion) emotion = highest_label def unload(): global global_timer_paused global_timer_paused = True return "Animation Paused" def start_talking(): global is_talking_override is_talking_override = True return "started" def stop_talking(): global is_talking_override is_talking_override = False return "stopped" def result_feed(): def generate(): while True: if global_result_image is not None: try: rgb_image = global_result_image[:, :, [2, 1, 0]] # Swap B and R channels pil_image = Image.fromarray(np.uint8(rgb_image)) # Convert to PIL Image if global_result_image.shape[2] == 4: # Check if there is an alpha channel present alpha_channel = global_result_image[:, :, 3] # Extract alpha channel pil_image.putalpha(Image.fromarray(np.uint8(alpha_channel))) # Set alpha channel in the PIL Image buffer = io.BytesIO() # Save as PNG with RGBA mode pil_image.save(buffer, format='PNG') image_bytes = buffer.getvalue() except Exception as e: print(f"Error when trying to write image: {e}") yield (b'--frame\r\n' # Send the PNG image b'Content-Type: image/png\r\n\r\n' + image_bytes + b'\r\n') else: time.sleep(0.1) return Response(generate(), mimetype='multipart/x-mixed-replace; boundary=frame') def talkinghead_load_file(stream): global global_source_image global global_reload global global_timer_paused global_timer_paused = False try: pil_image = Image.open(stream) # Load the image using PIL.Image.open img_data = BytesIO() # Create a copy of the image data in memory using BytesIO pil_image.save(img_data, format='PNG') global_reload = Image.open(BytesIO(img_data.getvalue())) # Set the global_reload to the copy of the image data except Image.UnidentifiedImageError: print(f"Could not load image from file, loading blank") full_path = os.path.join(os.getcwd(), os.path.normpath("talkinghead\\tha3\\images\\inital.png")) MainFrame.load_image(None, full_path) global_timer_paused = True return 'OK' def convert_linear_to_srgb(image: torch.Tensor) -> torch.Tensor: rgb_image = torch_linear_to_srgb(image[0:3, :, :]) return torch.cat([rgb_image, image[3:4, :, :]], dim=0) def launch_gui(device, model): global initAMI initAMI = True parser = argparse.ArgumentParser(description='uWu Waifu') # Add other parser arguments here args, unknown = parser.parse_known_args() try: poser = load_poser(model, device) pose_converter = create_ifacialmocap_pose_converter() #creates a list of 45 app = wx.App(redirect=False) main_frame = MainFrame(poser, pose_converter, device) main_frame.SetSize((750, 600)) #Lload default image (you can pass args.char if required) full_path = os.path.join(os.getcwd(), os.path.normpath("talkinghead\\tha3\\images\\inital.png")) main_frame.load_image(None, full_path) #main_frame.Show(True) main_frame.capture_timer.Start(100) main_frame.animation_timer.Start(100) wx.DisableAsserts() #prevent popup about debug alert closed from other threads app.MainLoop() except RuntimeError as e: print(e) sys.exit() class FpsStatistics: def __init__(self): self.count = 100 self.fps = [] def add_fps(self, fps): self.fps.append(fps) while len(self.fps) > self.count: del self.fps[0] def get_average_fps(self): if len(self.fps) == 0: return 0.0 else: return sum(self.fps) / len(self.fps) class MainFrame(wx.Frame): def __init__(self, poser: Poser, pose_converter: IFacialMocapPoseConverter, device: torch.device): super().__init__(None, wx.ID_ANY, "uWu Waifu") self.pose_converter = pose_converter self.poser = poser self.device = device self.last_blink_timestamp = 0 self.is_blinked = False self.targets = {"head_y_index": 0} self.progress = {"head_y_index": 0} self.direction = {"head_y_index": 1} self.originals = {"head_y_index": 0} self.forward = {"head_y_index": True} # Direction of interpolation self.start_values = {"head_y_index": 0} self.fps_statistics = FpsStatistics() self.image_load_counter = 0 self.custom_background_image = None # Add this line self.sliders = {} self.ifacialmocap_pose = create_default_ifacialmocap_pose() self.source_image_bitmap = wx.Bitmap(self.poser.get_image_size(), self.poser.get_image_size()) self.result_image_bitmap = wx.Bitmap(self.poser.get_image_size(), self.poser.get_image_size()) self.wx_source_image = None self.torch_source_image = None self.last_update_time = None self.create_ui() self.create_timers() self.Bind(wx.EVT_CLOSE, self.on_close) self.update_source_image_bitmap() self.update_result_image_bitmap() def create_timers(self): self.capture_timer = wx.Timer(self, wx.ID_ANY) self.Bind(wx.EVT_TIMER, self.update_capture_panel, id=self.capture_timer.GetId()) self.animation_timer = wx.Timer(self, wx.ID_ANY) self.Bind(wx.EVT_TIMER, self.update_result_image_bitmap, id=self.animation_timer.GetId()) def on_close(self, event: wx.Event): # Stop the timers self.animation_timer.Stop() self.capture_timer.Stop() # Destroy the windows self.Destroy() event.Skip() sys.exit(0) def random_generate_value(self, min, max, origin_value): random_value = random.choice(list(range(min, max, 1))) / 2500.0 randomized = origin_value + random_value if randomized > 1.0: randomized = 1.0 if randomized < 0: randomized = 0 return randomized def animationTalking(self): global is_talking current_pose = self.ifacialmocap_pose # NOTE: randomize mouth for blendshape_name in BLENDSHAPE_NAMES: if "jawOpen" in blendshape_name: if is_talking or is_talking_override: current_pose[blendshape_name] = self.random_generate_value(-5000, 5000, abs(1 - current_pose[blendshape_name])) else: current_pose[blendshape_name] = 0 return current_pose def animationHeadMove(self): current_pose = self.ifacialmocap_pose for key in [HEAD_BONE_Y]: #can add more to this list if needed current_pose[key] = self.random_generate_value(-20, 20, current_pose[key]) return current_pose def animationBlink(self): current_pose = self.ifacialmocap_pose if random.random() <= 0.03: current_pose["eyeBlinkRight"] = 1 current_pose["eyeBlinkLeft"] = 1 else: current_pose["eyeBlinkRight"] = 0 current_pose["eyeBlinkLeft"] = 0 return current_pose def addNamestoConvert(pose): index_to_name = { 0: 'eyebrow_troubled_left_index', #COMBACK TO UNK 1: 'eyebrow_troubled_right_index',#COMBACK TO UNK 2: 'eyebrow_angry_left_index', 3: 'eyebrow_angry_right_index', 4: 'unknown1', #COMBACK TO UNK 5: 'unknown2', #COMBACK TO UNK 6: 'eyebrow_raised_left_index', 7: 'eyebrow_raised_right_index', 8: 'eyebrow_happy_left_index', 9: 'eyebrow_happy_right_index', 10: 'unknown3', #COMBACK TO UNK 11: 'unknown4', #COMBACK TO UNK 12: 'wink_left_index', 13: 'wink_right_index', 14: 'eye_happy_wink_left_index', 15: 'eye_happy_wink_right_index', 16: 'eye_surprised_left_index', 17: 'eye_surprised_right_index', 18: 'unknown5', #COMBACK TO UNK 19: 'unknown6', #COMBACK TO UNK 20: 'unknown7', #COMBACK TO UNK 21: 'unknown8', #COMBACK TO UNK 22: 'eye_raised_lower_eyelid_left_index', 23: 'eye_raised_lower_eyelid_right_index', 24: 'iris_small_left_index', 25: 'iris_small_right_index', 26: 'mouth_aaa_index', 27: 'mouth_iii_index', 28: 'mouth_ooo_index', 29: 'unknown9a', #COMBACK TO UNK 30: 'mouth_ooo_index2', 31: 'unknown9', #COMBACK TO UNK 32: 'unknown10', #COMBACK TO UNK 33: 'unknown11', #COMBACK TO UNK 34: 'mouth_raised_corner_left_index', 35: 'mouth_raised_corner_right_index', 36: 'unknown12', 37: 'iris_rotation_x_index', 38: 'iris_rotation_y_index', 39: 'head_x_index', 40: 'head_y_index', 41: 'neck_z_index', 42: 'body_y_index', 43: 'body_z_index', 44: 'breathing_index' } output = [] for index, value in enumerate(pose): name = index_to_name.get(index, "Unknown") output.append(f"{name}: {value}") return output def get_emotion_values(self, emotion): # Place to define emotion presets global storepath #print(emotion) file_path = os.path.join(storepath, emotion + ".json") #print("trying: ", file_path) if not os.path.exists(file_path): print("using backup for: ", file_path) file_path = os.path.join(storepath, "_defaults.json") with open(file_path, 'r') as json_file: emotions = json.load(json_file) targetpose = emotions.get(emotion, {}) targetpose_values = targetpose #targetpose_values = list(targetpose.values()) #print("targetpose: ", targetpose, "for ", emotion) return targetpose_values def animateToEmotion(self, current_pose_list, target_pose_dict): transitionPose = [] # Loop through the current_pose_list for item in current_pose_list: index, value = item.split(': ') # Always take the value from target_pose_dict if the key exists if index in target_pose_dict and index != "breathing_index": transitionPose.append(f"{index}: {target_pose_dict[index]}") else: transitionPose.append(item) # Ensure that the number of elements in transitionPose matches with current_pose_list assert len(transitionPose) == len(current_pose_list) return transitionPose def animationMain(self): self.ifacialmocap_pose = self.animationBlink() self.ifacialmocap_pose = self.animationHeadMove() self.ifacialmocap_pose = self.animationTalking() return self.ifacialmocap_pose def filter_by_index(self, current_pose_list, index): # Create an empty list to store the filtered dictionaries filtered_list = [] # Iterate through each dictionary in the current_pose_list for pose_dict in current_pose_list: # Check if the 'breathing_index' key exists in the dictionary if index in pose_dict: # If the key exists, append the dictionary to the filtered list filtered_list.append(pose_dict) return filtered_list def on_erase_background(self, event: wx.Event): pass def create_animation_panel(self, parent): self.animation_panel = wx.Panel(parent, style=wx.RAISED_BORDER) self.animation_panel_sizer = wx.BoxSizer(wx.HORIZONTAL) self.animation_panel.SetSizer(self.animation_panel_sizer) self.animation_panel.SetAutoLayout(1) image_size = self.poser.get_image_size() # Left Column (Image) self.animation_left_panel = wx.Panel(self.animation_panel, style=wx.SIMPLE_BORDER) self.animation_left_panel_sizer = wx.BoxSizer(wx.VERTICAL) self.animation_left_panel.SetSizer(self.animation_left_panel_sizer) self.animation_left_panel.SetAutoLayout(1) self.animation_panel_sizer.Add(self.animation_left_panel, 1, wx.EXPAND) self.result_image_panel = wx.Panel(self.animation_left_panel, size=(image_size, image_size), style=wx.SIMPLE_BORDER) self.result_image_panel.Bind(wx.EVT_PAINT, self.paint_result_image_panel) self.result_image_panel.Bind(wx.EVT_ERASE_BACKGROUND, self.on_erase_background) self.result_image_panel.Bind(wx.EVT_LEFT_DOWN, self.load_image) self.animation_left_panel_sizer.Add(self.result_image_panel, 1, wx.EXPAND) separator = wx.StaticLine(self.animation_left_panel, -1, size=(256, 1)) self.animation_left_panel_sizer.Add(separator, 0, wx.EXPAND) self.fps_text = wx.StaticText(self.animation_left_panel, label="") self.animation_left_panel_sizer.Add(self.fps_text, wx.SizerFlags().Border()) self.animation_left_panel_sizer.Fit(self.animation_left_panel) # Right Column (Sliders) self.animation_right_panel = wx.Panel(self.animation_panel, style=wx.SIMPLE_BORDER) self.animation_right_panel_sizer = wx.BoxSizer(wx.VERTICAL) self.animation_right_panel.SetSizer(self.animation_right_panel_sizer) self.animation_right_panel.SetAutoLayout(1) self.animation_panel_sizer.Add(self.animation_right_panel, 1, wx.EXPAND) separator = wx.StaticLine(self.animation_right_panel, -1, size=(256, 5)) self.animation_right_panel_sizer.Add(separator, 0, wx.EXPAND) background_text = wx.StaticText(self.animation_right_panel, label="--- Background ---", style=wx.ALIGN_CENTER) self.animation_right_panel_sizer.Add(background_text, 0, wx.EXPAND) self.output_background_choice = wx.Choice( self.animation_right_panel, choices=[ "TRANSPARENT", "GREEN", "BLUE", "BLACK", "WHITE", "LOADED", "CUSTOM" ] ) self.output_background_choice.SetSelection(0) self.animation_right_panel_sizer.Add(self.output_background_choice, 0, wx.EXPAND) blendshape_groups = { 'Eyes': ['eyeLookOutLeft', 'eyeLookOutRight', 'eyeLookDownLeft', 'eyeLookUpLeft', 'eyeWideLeft', 'eyeWideRight'], 'Mouth': ['mouthFrownLeft'], 'Cheek': ['cheekSquintLeft', 'cheekSquintRight', 'cheekPuff'], 'Brow': ['browDownLeft', 'browOuterUpLeft', 'browDownRight', 'browOuterUpRight', 'browInnerUp'], 'Eyelash': ['mouthSmileLeft'], 'Nose': ['noseSneerLeft', 'noseSneerRight'], 'Misc': ['tongueOut'] } for group_name, variables in blendshape_groups.items(): collapsible_pane = wx.CollapsiblePane(self.animation_right_panel, label=group_name, style=wx.CP_DEFAULT_STYLE | wx.CP_NO_TLW_RESIZE) collapsible_pane.Bind(wx.EVT_COLLAPSIBLEPANE_CHANGED, self.on_pane_changed) self.animation_right_panel_sizer.Add(collapsible_pane, 0, wx.EXPAND) pane_sizer = wx.BoxSizer(wx.VERTICAL) collapsible_pane.GetPane().SetSizer(pane_sizer) for variable in variables: variable_label = wx.StaticText(collapsible_pane.GetPane(), label=variable) # Multiply min and max values by 100 for the slider slider = wx.Slider( collapsible_pane.GetPane(), value=0, minValue=0, maxValue=100, size=(150, -1), # Set the width to 150 and height to default style=wx.SL_HORIZONTAL | wx.SL_LABELS ) slider.SetName(variable) slider.Bind(wx.EVT_SLIDER, self.on_slider_change) self.sliders[slider.GetId()] = slider pane_sizer.Add(variable_label, 0, wx.ALIGN_CENTER | wx.ALL, 5) pane_sizer.Add(slider, 0, wx.EXPAND) self.animation_right_panel_sizer.Fit(self.animation_right_panel) self.animation_panel_sizer.Fit(self.animation_panel) def on_pane_changed(self, event): # Update the layout when a collapsible pane is expanded or collapsed self.animation_right_panel.Layout() def on_slider_change(self, event): slider = event.GetEventObject() value = slider.GetValue() / 100.0 # Divide by 100 to get the actual float value #print(value) slider_name = slider.GetName() self.ifacialmocap_pose[slider_name] = value def create_ui(self): #MAke the UI Elements self.main_sizer = wx.BoxSizer(wx.VERTICAL) self.SetSizer(self.main_sizer) self.SetAutoLayout(1) self.capture_pose_lock = threading.Lock() #Main panel with JPS self.create_animation_panel(self) self.main_sizer.Add(self.animation_panel, wx.SizerFlags(0).Expand().Border(wx.ALL, 5)) def update_capture_panel(self, event: wx.Event): data = self.ifacialmocap_pose for rotation_name in ROTATION_NAMES: value = data[rotation_name] @staticmethod def convert_to_100(x): return int(max(0.0, min(1.0, x)) * 100) def paint_source_image_panel(self, event: wx.Event): wx.BufferedPaintDC(self.source_image_panel, self.source_image_bitmap) def update_source_image_bitmap(self): dc = wx.MemoryDC() dc.SelectObject(self.source_image_bitmap) if self.wx_source_image is None: self.draw_nothing_yet_string(dc) else: dc.Clear() dc.DrawBitmap(self.wx_source_image, 0, 0, True) del dc def draw_nothing_yet_string(self, dc): dc.Clear() font = wx.Font(wx.FontInfo(14).Family(wx.FONTFAMILY_SWISS)) dc.SetFont(font) w, h = dc.GetTextExtent("Nothing yet!") dc.DrawText("Nothing yet!", (self.poser.get_image_size() - w) // 2, (self.poser.get_image_size() - h) // 2) def paint_result_image_panel(self, event: wx.Event): wx.BufferedPaintDC(self.result_image_panel, self.result_image_bitmap) def combine_pose_with_names(combine_pose): pose_names = [ 'eyeLookInLeft', 'eyeLookOutLeft', 'eyeLookDownLeft', 'eyeLookUpLeft', 'eyeBlinkLeft', 'eyeSquintLeft', 'eyeWideLeft', 'eyeLookInRight', 'eyeLookOutRight', 'eyeLookDownRight', 'eyeLookUpRight', 'eyeBlinkRight', 'eyeSquintRight', 'eyeWideRight', 'browDownLeft', 'browOuterUpLeft', 'browDownRight', 'browOuterUpRight', 'browInnerUp', 'noseSneerLeft', 'noseSneerRight', 'cheekSquintLeft', 'cheekSquintRight', 'cheekPuff', 'mouthLeft', 'mouthDimpleLeft', 'mouthFrownLeft', 'mouthLowerDownLeft', 'mouthPressLeft', 'mouthSmileLeft', 'mouthStretchLeft', 'mouthUpperUpLeft', 'mouthRight', 'mouthDimpleRight', 'mouthFrownRight', 'mouthLowerDownRight', 'mouthPressRight', 'mouthSmileRight', 'mouthStretchRight', 'mouthUpperUpRight', 'mouthClose', 'mouthFunnel', 'mouthPucker', 'mouthRollLower', 'mouthRollUpper', 'mouthShrugLower', 'mouthShrugUpper', 'jawLeft', 'jawRight', 'jawForward', 'jawOpen', 'tongueOut', 'headBoneX', 'headBoneY', 'headBoneZ', 'headBoneQuat', 'leftEyeBoneX', 'leftEyeBoneY', 'leftEyeBoneZ', 'leftEyeBoneQuat', 'rightEyeBoneX', 'rightEyeBoneY', 'rightEyeBoneZ', 'rightEyeBoneQuat' ] pose_dict = dict(zip(pose_names, combine_pose)) return pose_dict def determine_data_type(self, data): if isinstance(data, list): print("It's a list.") elif isinstance(data, dict): print("It's a dictionary.") elif isinstance(data, str): print("It's a string.") else: print("Unknown data type.") def count_elements(self, input_data): if isinstance(input_data, list) or isinstance(input_data, dict): return len(input_data) else: raise TypeError("Input must be a list or dictionary.") def convert_list_to_dict(self, list_str): # Evaluate the string to get the actual list list_data = ast.literal_eval(list_str) # Initialize an empty dictionary result_dict = {} # Convert the list to a dictionary for item in list_data: key, value_str = item.split(': ') value = float(value_str) result_dict[key] = value return result_dict def dict_to_tensor(self, d): if isinstance(d, dict): return torch.tensor(list(d.values())) elif isinstance(d, list): return torch.tensor(d) else: raise ValueError("Unsupported data type passed to dict_to_tensor.") def update_ifacualmocap_pose(self, ifacualmocap_pose, emotion_pose): # Update Values - The following values are in emotion_pose but not defined in ifacualmocap_pose # eye_happy_wink_left_index, eye_happy_wink_right_index # eye_surprised_left_index, eye_surprised_right_index # eye_relaxed_left_index, eye_relaxed_right_index # eye_unimpressed # eye_raised_lower_eyelid_left_index, eye_raised_lower_eyelid_right_index # mouth_uuu_index # mouth_eee_index # mouth_ooo_index # mouth_delta # mouth_smirk # body_y_index # body_z_index # breathing_index ifacualmocap_pose['browDownLeft'] = emotion_pose['eyebrow_troubled_left_index'] ifacualmocap_pose['browDownRight'] = emotion_pose['eyebrow_troubled_right_index'] ifacualmocap_pose['browOuterUpLeft'] = emotion_pose['eyebrow_angry_left_index'] ifacualmocap_pose['browOuterUpRight'] = emotion_pose['eyebrow_angry_right_index'] ifacualmocap_pose['browInnerUp'] = emotion_pose['eyebrow_happy_left_index'] ifacualmocap_pose['browInnerUp'] += emotion_pose['eyebrow_happy_right_index'] ifacualmocap_pose['browDownLeft'] = emotion_pose['eyebrow_raised_left_index'] ifacualmocap_pose['browDownRight'] = emotion_pose['eyebrow_raised_right_index'] ifacualmocap_pose['browDownLeft'] += emotion_pose['eyebrow_lowered_left_index'] ifacualmocap_pose['browDownRight'] += emotion_pose['eyebrow_lowered_right_index'] ifacualmocap_pose['browDownLeft'] += emotion_pose['eyebrow_serious_left_index'] ifacualmocap_pose['browDownRight'] += emotion_pose['eyebrow_serious_right_index'] # Update eye values ifacualmocap_pose['eyeWideLeft'] = emotion_pose['eye_surprised_left_index'] ifacualmocap_pose['eyeWideRight'] = emotion_pose['eye_surprised_right_index'] # Update eye blink (though we will overwrite it later) ifacualmocap_pose['eyeBlinkLeft'] = emotion_pose['eye_wink_left_index'] ifacualmocap_pose['eyeBlinkRight'] = emotion_pose['eye_wink_right_index'] # Update iris rotation values ifacualmocap_pose['eyeLookInLeft'] = -emotion_pose['iris_rotation_y_index'] ifacualmocap_pose['eyeLookOutLeft'] = emotion_pose['iris_rotation_y_index'] ifacualmocap_pose['eyeLookInRight'] = emotion_pose['iris_rotation_y_index'] ifacualmocap_pose['eyeLookOutRight'] = -emotion_pose['iris_rotation_y_index'] ifacualmocap_pose['eyeLookUpLeft'] = emotion_pose['iris_rotation_x_index'] ifacualmocap_pose['eyeLookDownLeft'] = -emotion_pose['iris_rotation_x_index'] ifacualmocap_pose['eyeLookUpRight'] = emotion_pose['iris_rotation_x_index'] ifacualmocap_pose['eyeLookDownRight'] = -emotion_pose['iris_rotation_x_index'] # Update iris size values ifacualmocap_pose['irisWideLeft'] = emotion_pose['iris_small_left_index'] ifacualmocap_pose['irisWideRight'] = emotion_pose['iris_small_right_index'] # Update head rotation values ifacualmocap_pose['headBoneX'] = -emotion_pose['head_x_index'] * 15.0 ifacualmocap_pose['headBoneY'] = -emotion_pose['head_y_index'] * 10.0 ifacualmocap_pose['headBoneZ'] = emotion_pose['neck_z_index'] * 15.0 # Update mouth values ifacualmocap_pose['mouthSmileLeft'] = emotion_pose['mouth_aaa_index'] ifacualmocap_pose['mouthSmileRight'] = emotion_pose['mouth_aaa_index'] ifacualmocap_pose['mouthFrownLeft'] = emotion_pose['mouth_lowered_corner_left_index'] ifacualmocap_pose['mouthFrownRight'] = emotion_pose['mouth_lowered_corner_right_index'] ifacualmocap_pose['mouthPressLeft'] = emotion_pose['mouth_raised_corner_left_index'] ifacualmocap_pose['mouthPressRight'] = emotion_pose['mouth_raised_corner_right_index'] return ifacualmocap_pose def update_blinking_pose(self, tranisitiondPose): PARTS = ['wink_left_index', 'wink_right_index'] updated_list = [] should_blink = random.random() <= 0.03 # Determine if there should be a blink for item in tranisitiondPose: key, value = item.split(': ') if key in PARTS: # If there should be a blink, set value to 1; otherwise, use the provided value new_value = 1 if should_blink else float(value) updated_list.append(f"{key}: {new_value}") else: updated_list.append(item) return updated_list def update_talking_pose(self, tranisitiondPose): global is_talking, is_talking_override MOUTHPARTS = ['mouth_aaa_index'] updated_list = [] for item in tranisitiondPose: key, value = item.split(': ') if key in MOUTHPARTS and is_talking_override: new_value = self.random_generate_value(-5000, 5000, abs(1 - float(value))) updated_list.append(f"{key}: {new_value}") else: updated_list.append(item) return updated_list def update_sway_pose_good(self, tranisitiondPose): MOVEPARTS = ['head_y_index'] updated_list = [] print( self.start_values, self.targets, self.progress, self.direction ) for item in tranisitiondPose: key, value = item.split(': ') if key in MOVEPARTS: current_value = float(value) # If progress reaches 1 or 0 if self.progress[key] >= 1 or self.progress[key] <= 0: # Reverse direction self.direction[key] *= -1 # If direction is now forward, set a new target and store starting value if self.direction[key] == 1: self.start_values[key] = current_value self.targets[key] = current_value + random.uniform(-1, 1) self.progress[key] = 0 # Reset progress when setting a new target # Use lerp to interpolate between start and target values new_value = self.start_values[key] + self.progress[key] * (self.targets[key] - self.start_values[key]) # Ensure the value remains within bounds (just in case) new_value = min(max(new_value, -1), 1) # Update progress based on direction self.progress[key] += 0.02 * self.direction[key] updated_list.append(f"{key}: {new_value}") else: updated_list.append(item) return updated_list def update_sway_pose(self, tranisitiondPose): MOVEPARTS = ['head_y_index'] updated_list = [] #print( self.start_values, self.targets, self.progress, self.direction ) for item in tranisitiondPose: key, value = item.split(': ') if key in MOVEPARTS: current_value = float(value) # Use lerp to interpolate between start and target values new_value = self.start_values[key] + self.progress[key] * (self.targets[key] - self.start_values[key]) # Ensure the value remains within bounds (just in case) new_value = min(max(new_value, -1), 1) # Check if we've reached the target or start value is_close_to_target = abs(new_value - self.targets[key]) < 0.04 is_close_to_start = abs(new_value - self.start_values[key]) < 0.04 if (self.direction[key] == 1 and is_close_to_target) or (self.direction[key] == -1 and is_close_to_start): # Reverse direction self.direction[key] *= -1 # If direction is now forward, set a new target and store starting value if self.direction[key] == 1: self.start_values[key] = new_value self.targets[key] = current_value + random.uniform(-0.6, 0.6) self.progress[key] = 0 # Reset progress when setting a new target # Update progress based on direction self.progress[key] += 0.04 * self.direction[key] updated_list.append(f"{key}: {new_value}") else: updated_list.append(item) return updated_list def update_transition_pose(self, last_transition_pose_s, transition_pose_s): inMotion = True # Create dictionaries from the lists for easier comparison last_transition_dict = {} for item in last_transition_pose_s: key = item.split(': ')[0] value = float(item.split(': ')[1]) if key == 'unknown': key += f"_{list(last_transition_dict.values()).count(value)}" last_transition_dict[key] = value transition_dict = {} for item in transition_pose_s: key = item.split(': ')[0] value = float(item.split(': ')[1]) if key == 'unknown': key += f"_{list(transition_dict.values()).count(value)}" transition_dict[key] = value updated_last_transition_pose = [] for key, last_value in last_transition_dict.items(): # If the key exists in transition_dict, increment its value by 0.4 and clip it to the target if key in transition_dict: # If the key is 'wink_left_index' or 'wink_right_index', set the value directly dont animate blinks if key in ['wink_left_index', 'wink_right_index']: # BLINK FIX last_value = transition_dict[key] # For all other keys, increment its value by 0.1 of the delta and clip it to the target else: delta = transition_dict[key] - last_value last_value += delta * 0.1 # Reconstruct the string and append it to the updated list updated_last_transition_pose.append(f"{key}: {last_value}") # If any value is less than the target, set inMotion to True if any(last_transition_dict[k] < transition_dict[k] for k in last_transition_dict if k in transition_dict): inMotion = True else: inMotion = False return updated_last_transition_pose def update_result_image_bitmap(self, event: Optional[wx.Event] = None): global global_timer_paused global initAMI global global_result_image global global_reload global emotion global fps global current_pose global is_talking global is_talking_override global lasttranisitiondPose if global_timer_paused: return try: if global_reload is not None: MainFrame.load_image(self, event=None, file_path=None) # call load_image function here return #OLD METHOD #ifacialmocap_pose = self.animationMain() #GET ANIMATION CHANGES #current_posesaved = self.pose_converter.convert(ifacialmocap_pose) #combined_posesaved = current_posesaved #NEW METHOD #CREATES THE DEFAULT POSE AND STORES OBJ IN STRING #ifacialmocap_pose = self.animationMain() #DISABLE FOR TESTING!!!!!!!!!!!!!!!!!!!!!!!! ifacialmocap_pose = self.ifacialmocap_pose #print("ifacialmocap_pose", ifacialmocap_pose) #GET EMOTION SETTING emotion_pose = self.get_emotion_values(emotion) #print("emotion_pose ", emotion_pose) #MERGE EMOTION SETTING WITH CURRENT OUTPUT updated_pose = self.update_ifacualmocap_pose(ifacialmocap_pose, emotion_pose) #print("updated_pose ", updated_pose) #CONVERT RESULT TO FORMAT NN CAN USE current_pose = self.pose_converter.convert(updated_pose) #print("current_pose ", current_pose) #SEND THROUGH CONVERT current_pose = self.pose_converter.convert(ifacialmocap_pose) #print("current_pose2 ", current_pose) #ADD LABELS/NAMES TO THE POSE names_current_pose = MainFrame.addNamestoConvert(current_pose) #print("current pose :", names_current_pose) #GET THE EMOTION VALUES again for some reason emotion_pose2 = self.get_emotion_values(emotion) #print("target pose :", emotion_pose2) #APPLY VALUES TO THE POSE AGAIN?? This needs to overwrite the values tranisitiondPose = self.animateToEmotion(names_current_pose, emotion_pose2) #print("combine pose :", tranisitiondPose) #smooth animate #print("LAST VALUES: ", lasttranisitiondPose) #print("TARGER VALUES: ", tranisitiondPose) if lasttranisitiondPose != "NotInit": tranisitiondPose = self.update_transition_pose(lasttranisitiondPose, tranisitiondPose) #print("smoothed: ", tranisitiondPose) #Animate blinking tranisitiondPose = self.update_blinking_pose(tranisitiondPose) #Animate Head Sway tranisitiondPose = self.update_sway_pose(tranisitiondPose) #Animate Talking tranisitiondPose = self.update_talking_pose(tranisitiondPose) #reformat the data correctly parsed_data = [] for item in tranisitiondPose: key, value_str = item.split(': ') value = float(value_str) parsed_data.append((key, value)) tranisitiondPosenew = [value for _, value in parsed_data] #not sure what this is for TBH ifacialmocap_pose = tranisitiondPosenew if self.torch_source_image is None: dc = wx.MemoryDC() dc.SelectObject(self.result_image_bitmap) self.draw_nothing_yet_string(dc) del dc return #pose = torch.tensor(tranisitiondPosenew, device=self.device, dtype=self.poser.get_dtype()) pose = self.dict_to_tensor(tranisitiondPosenew).to(device=self.device, dtype=self.poser.get_dtype()) with torch.no_grad(): output_image = self.poser.pose(self.torch_source_image, pose)[0].float() output_image = convert_linear_to_srgb((output_image + 1.0) / 2.0) c, h, w = output_image.shape output_image = (255.0 * torch.transpose(output_image.reshape(c, h * w), 0, 1)).reshape(h, w, c).byte() numpy_image = output_image.detach().cpu().numpy() wx_image = wx.ImageFromBuffer(numpy_image.shape[0], numpy_image.shape[1], numpy_image[:, :, 0:3].tobytes(), numpy_image[:, :, 3].tobytes()) wx_bitmap = wx_image.ConvertToBitmap() dc = wx.MemoryDC() dc.SelectObject(self.result_image_bitmap) dc.Clear() dc.DrawBitmap(wx_bitmap, (self.poser.get_image_size() - numpy_image.shape[0]) // 2, (self.poser.get_image_size() - numpy_image.shape[1]) // 2, True) numpy_image_bgra = numpy_image[:, :, [2, 1, 0, 3]] # Convert color channels from RGB to BGR and keep alpha channel global_result_image = numpy_image_bgra del dc time_now = time.time_ns() if self.last_update_time is not None: elapsed_time = time_now - self.last_update_time fps = 1.0 / (elapsed_time / 10**9) if self.torch_source_image is not None: self.fps_statistics.add_fps(fps) self.fps_text.SetLabelText("FPS = %0.2f" % self.fps_statistics.get_average_fps()) self.last_update_time = time_now if(initAMI == True): #If the models are just now initalized stop animation to save global_timer_paused = True initAMI = False if random.random() <= 0.01: trimmed_fps = round(fps, 1) #print("talkinghead FPS: {:.1f}".format(trimmed_fps)) #Store current pose to use as last pose on next loop lasttranisitiondPose = tranisitiondPose self.Refresh() except KeyboardInterrupt: print("Update process was interrupted by the user.") wx.Exit() def resize_image(image, size=(512, 512)): image.thumbnail(size, Image.LANCZOS) # Step 1: Resize the image to maintain the aspect ratio with the larger dimension being 512 pixels new_image = Image.new("RGBA", size) # Step 2: Create a new image of size 512x512 with transparency new_image.paste(image, ((size[0] - image.size[0]) // 2, (size[1] - image.size[1]) // 2)) # Step 3: Paste the resized image into the new image, centered return new_image def load_image(self, event: wx.Event, file_path=None): global global_source_image # Declare global_source_image as a global variable global global_reload if global_reload is not None: file_path = "global_reload" try: if file_path == "global_reload": pil_image = global_reload else: pil_image = resize_PIL_image( extract_PIL_image_from_filelike(file_path), (self.poser.get_image_size(), self.poser.get_image_size())) w, h = pil_image.size if pil_image.size != (512, 512): print("Resizing Char Card to work") pil_image = MainFrame.resize_image(pil_image) w, h = pil_image.size if pil_image.mode != 'RGBA': self.source_image_string = "Image must have alpha channel!" self.wx_source_image = None self.torch_source_image = None else: self.wx_source_image = wx.Bitmap.FromBufferRGBA(w, h, pil_image.convert("RGBA").tobytes()) self.torch_source_image = extract_pytorch_image_from_PIL_image(pil_image) \ .to(self.device).to(self.poser.get_dtype()) global_source_image = self.torch_source_image # Set global_source_image as a global variable self.update_source_image_bitmap() except Exception as error: print("Error: ", error) global_reload = None #reset the globe load self.Refresh() if __name__ == "__main__": parser = argparse.ArgumentParser(description='uWu Waifu') parser.add_argument( '--model', type=str, required=False, default='separable_float', choices=['standard_float', 'separable_float', 'standard_half', 'separable_half'], help='The model to use.' ) parser.add_argument('--char', type=str, required=False, help='The path to the character image.') parser.add_argument( '--device', type=str, required=False, default='cuda', choices=['cpu', 'cuda'], help='The device to use for PyTorch ("cuda" for GPU, "cpu" for CPU).' ) args = parser.parse_args() launch_gui(device=args.device, model=args.model)