Spaces:

Spanicin
/

ucsia

Sleeping

App Files Files Community

Spanicin commited on Mar 6

Commit

4bf86d6

•

1 Parent(s): 84b7669

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -12

app.py CHANGED Viewed

@@ -22,25 +22,25 @@ from flask_swagger_ui import get_swaggerui_blueprint
 import uuid
 class AnimationConfig:
-    def __init__(self, driven_audio_path, source_image_path, result_folder):
         self.driven_audio = driven_audio_path
         self.source_image = source_image_path
         self.ref_eyeblink = None
         self.ref_pose = None
         self.checkpoint_dir = './checkpoints'
         self.result_dir = result_folder
-        self.pose_style = 1
-        self.batch_size = 1
         self.size = 256
-        self.expression_scale = 1
         self.input_yaw = None
         self.input_pitch = None
         self.input_roll = None
-        self.enhancer = 'gfpgan'
         self.background_enhancer = None
         self.cpu = False
         self.face3dvis = False
-        self.still = False
         self.preprocess = 'crop'
         self.verbose = False
         self.old_version = False
@@ -74,6 +74,7 @@ app.register_blueprint(swagger_ui_blueprint, url_prefix=SWAGGER_URL)
 app.config['temp_response'] = None
 app.config['generation_thread'] = None
 TEMP_DIR = tempfile.TemporaryDirectory()
@@ -186,32 +187,43 @@ def translate_text(text, target_language):
         model="gpt-4-0125-preview",
         messages=[
             {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": f"Translate the following text into {target_language} Completely: {text}\n"},
         ],
         max_tokens=len(text),
         temperature=0.3,
     )
     return response
 @app.route("/run", methods=['POST'])
 def generate_video():
     if request.method == 'POST':
         source_image = request.files['source_image']
         text_prompt = request.form['text_prompt']
         voice_cloning = request.form.get('voice_cloning', 'no')
-        target_language = request.form.get('target_language', 'English')
-        if target_language != 'English':
             response = translate_text(text_prompt, target_language)
             text_prompt = response.choices[0].message.content.strip()
             print('text_prompt',text_prompt)
         source_image_path = save_uploaded_file(source_image, 'source_image.png')
         print(source_image_path)
         if voice_cloning == 'no':
             response = client.audio.speech.create(model="tts-1-hd",
-                                                voice="onyx",
                                                 input = text_prompt)
             with tempfile.NamedTemporaryFile(suffix=".wav", prefix="text_to_speech_", delete=False) as temp_file:
@@ -241,7 +253,7 @@ def generate_video():
         os.makedirs(result_folder, exist_ok=True)
     # Example of using the class with some hypothetical paths
-    args = AnimationConfig(driven_audio_path=driven_audio_path, source_image_path=source_image_path, result_folder=result_folder)
     if torch.cuda.is_available() and not args.cpu:
         args.device = "cuda"
@@ -263,7 +275,7 @@ def generate_video():
 @app.route("/status", methods=["GET"])
 def check_generation_status():
-    response = {"base64_video": "", "status": ""}
     process_id = request.args.get('process_id', None)
     # process_id is required to check the status for that specific process
@@ -275,6 +287,7 @@ def check_generation_status():
             # app.config['temp_response']['status'] = 'completed'
             final_response = app.config['temp_response']
             response["base64_video"] = final_response
             response["status"] = "completed"
             return jsonify(response)
     return jsonify({"error":"No process id provided"})

 import uuid
 class AnimationConfig:
+    def __init__(self, driven_audio_path, source_image_path, result_folder,pose_style,expression_scale,enhancer):
         self.driven_audio = driven_audio_path
         self.source_image = source_image_path
         self.ref_eyeblink = None
         self.ref_pose = None
         self.checkpoint_dir = './checkpoints'
         self.result_dir = result_folder
+        self.pose_style = pose_style
+        self.batch_size = 2
         self.size = 256
+        self.expression_scale = expression_scale
         self.input_yaw = None
         self.input_pitch = None
         self.input_roll = None
+        self.enhancer = enhancer
         self.background_enhancer = None
         self.cpu = False
         self.face3dvis = False
+        self.still = False
         self.preprocess = 'crop'
         self.verbose = False
         self.old_version = False
 app.config['temp_response'] = None
 app.config['generation_thread'] = None
+app.config['text_prompt'] = None
 TEMP_DIR = tempfile.TemporaryDirectory()
         model="gpt-4-0125-preview",
         messages=[
             {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": f"Translate and give just the following text into {target_language} as response: {text}\n"},
         ],
         max_tokens=len(text),
         temperature=0.3,
     )
     return response
 @app.route("/run", methods=['POST'])
 def generate_video():
     if request.method == 'POST':
         source_image = request.files['source_image']
         text_prompt = request.form['text_prompt']
         voice_cloning = request.form.get('voice_cloning', 'no')
+        target_language = request.form.get('target_language', None)
+        pose_style = int(request.form.get('pose_style', 1))
+        expression_scale = int(request.form.get('expression_scale', 1))
+        enhancer = request.form.get('enhancer', None)
+        voice_gender = request.form.get('voice_gender', 'male')
+        if target_language is not None:
             response = translate_text(text_prompt, target_language)
             text_prompt = response.choices[0].message.content.strip()
             print('text_prompt',text_prompt)
+        app.config['text_prompt'] = text_prompt
         source_image_path = save_uploaded_file(source_image, 'source_image.png')
         print(source_image_path)
         if voice_cloning == 'no':
+            if voice_gender == 'male':
+                voice = 'onyx'
+            else:
+                voice = 'nova'
             response = client.audio.speech.create(model="tts-1-hd",
+                                                voice=voice,
                                                 input = text_prompt)
             with tempfile.NamedTemporaryFile(suffix=".wav", prefix="text_to_speech_", delete=False) as temp_file:
         os.makedirs(result_folder, exist_ok=True)
     # Example of using the class with some hypothetical paths
+    args = AnimationConfig(driven_audio_path=driven_audio_path, source_image_path=source_image_path, result_folder=result_folder, pose_style=pose_style, expression_scale=expression_scale, enhancer=enhancer)
     if torch.cuda.is_available() and not args.cpu:
         args.device = "cuda"
 @app.route("/status", methods=["GET"])
 def check_generation_status():
+    response = {"base64_video": "","text_prompt":"", "status": ""}
     process_id = request.args.get('process_id', None)
     # process_id is required to check the status for that specific process
             # app.config['temp_response']['status'] = 'completed'
             final_response = app.config['temp_response']
             response["base64_video"] = final_response
+            response["text_prompt"] = app.config.get('text_prompt')
             response["status"] = "completed"
             return jsonify(response)
     return jsonify({"error":"No process id provided"})