Spaces:

Blane187
/

audio-mouth

Running

App Files Files Community

Blane187 commited on Sep 28, 2024

Commit

9d86442

verified ·

1 Parent(s): 03d6d06

Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

.gitattributes +3 -35
README.md +32 -12
audio/test.wav +3 -0
config.json +21 -0
frames/closed_mouth.png +0 -0
frames/closed_mouth_blinking.png +0 -0
frames/open_mouth.png +0 -0
frames/open_mouth_blinking.png +0 -0
main.py +115 -0
output/test.mp4 +0 -0
requirements.txt +5 -0
run.bat +1 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,3 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+# Auto detect text files and perform LF normalization
+* text=auto
+audio/test.wav filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,12 +1,32 @@
----
-title: Audio Mouth
-emoji: 🚀
-colorFrom: purple
-colorTo: pink
-sdk: gradio
-sdk_version: 4.44.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# AudioMouth
+AudioMouth is a simple Python app that generates animated videos by syncing mouth movements with audio decibel levels. It processes an audio file and switches between images (open and closed mouth) to create a lip-sync effect.
+## Features
+- Syncs mouth images to audio based on decibel levels.
+- Custom FPS.
+- Outputs video with green screen background (or a custom color) for chroma keying.
+## Installation
+Git clone the repository and install the required dependencies. You can do this by opening the command line in the AudioMouth folder and running:
+```bash
+git clone https://github.com/luisesantillan/AudioMouth
+cd AudioMouth
+pip install -r requirements.txt
+```
+## Usage
+Add 1-4 images in the frames folder and modify the paths in the config.json to use the images you want.
+Put the audios into the audio folder. It will create as many animations as there are audios.
+closed_mouth            |  closed_mouth_blinking            |  open_mouth            |  open_mouth_blinking
+:-------------------------:|:-------------------------:|:-------------------------:|:-------------------------:
+![closed_mouth](https://github.com/user-attachments/assets/3ed0c597-df0e-4165-98d4-cf978e1338bb) | ![closed_mouth_blinking](https://github.com/user-attachments/assets/1296c2a7-4304-4935-b398-4ee5e1fe8a10) | ![open_mouth](https://github.com/user-attachments/assets/4715a73a-1a27-4ac9-a20b-954dde0aac0b) | ![open_mouth_blinking](https://github.com/user-attachments/assets/b7d04648-9158-4dd2-889c-27c67a64e0b2)
+If you're on Windows, now you can open run.bat and the output will be saved in the output folder.
+If you're on Linux, simply run the main.py file.
+https://github.com/user-attachments/assets/dcf3728c-0d3b-455d-b17e-5e9819be069b

audio/test.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1988fd65b06aef6657441ab017d4cf40f86ca36005bb1df3a26b3c7ad6628dd8
+size 1269936

config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "background_color": [0, 255, 0],
+    "frame_paths": {
+        "closed_mouth": "frames/closed_mouth.png",
+        "open_mouth": "frames/open_mouth.png",
+        "closed_mouth_blinking": "frames/closed_mouth_blinking.png",
+        "open_mouth_blinking": "frames/open_mouth_blinking.png"
+    },
+    "output_path":"output",
+    "frame_duration_ms":1000,
+    "audio_path": "audio",
+    "blink_duration": 0.15,
+    "minimum_blinking_delay":2,
+    "maximum_blinking_delay": 5,
+    "initial_blink_time": -2,
+    "frame_rate": 24,
+    "dynamic_threshold": 1,
+    "decibel_threshold": -30.0,
+    "codec": "libx264",
+    "audio_codec": "aac"
+}

frames/closed_mouth.png ADDED Viewed

frames/closed_mouth_blinking.png ADDED Viewed

frames/open_mouth.png ADDED Viewed

frames/open_mouth_blinking.png ADDED Viewed

main.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os, random, json
+import numpy as np
+from pydub import AudioSegment
+from pydub.utils import make_chunks
+from pydub.effects import compress_dynamic_range
+from PIL import Image
+import cv2
+from moviepy.editor import VideoClip, AudioFileClip
+# Load configuration
+with open('config.json', 'r') as config_file:
+    config = json.load(config_file)
+# Load the images
+closed_mouth_img = Image.open(config['frame_paths']['closed_mouth'])
+open_mouth_img = Image.open(config['frame_paths']['open_mouth'])
+closed_mouth_blinking_img = Image.open(config['frame_paths']['closed_mouth_blinking'])
+open_mouth_blinking_img = Image.open(config['frame_paths']['open_mouth_blinking'])
+# Create a background with the color from config
+background_color = tuple(config['background_color'])
+background = Image.new('RGBA', closed_mouth_img.size, background_color)
+# Composite the images with the background
+closed_mouth_img = Image.alpha_composite(background, closed_mouth_img)
+open_mouth_img = Image.alpha_composite(background, open_mouth_img)
+closed_mouth_blinking_img = Image.alpha_composite(background, closed_mouth_blinking_img)
+open_mouth_blinking_img = Image.alpha_composite(background, open_mouth_blinking_img)
+# Convert images to OpenCV format
+closed_mouth_cv = cv2.cvtColor(np.array(closed_mouth_img), cv2.COLOR_RGBA2RGB)
+open_mouth_cv = cv2.cvtColor(np.array(open_mouth_img), cv2.COLOR_RGBA2RGB)
+closed_mouth_blinking_cv = cv2.cvtColor(np.array(closed_mouth_blinking_img), cv2.COLOR_RGBA2RGB)
+open_mouth_blinking_cv = cv2.cvtColor(np.array(open_mouth_blinking_img), cv2.COLOR_RGBA2RGB)
+# Decide whether to blink
+def should_blink(t, last_blink_time):
+    if t - last_blink_time > random.uniform(config['minimum_blinking_delay'],config['maximum_blinking_delay']):
+        return True
+    return False
+blink_duration = config['blink_duration']
+last_blink_time = config['initial_blink_time']
+# Set parameters
+frame_rate = config['frame_rate']
+frame_duration_ms = config['frame_duration_ms'] // frame_rate
+for audio_file in os.listdir(config['audio_path']):
+    # Load the audio
+    audio_path = os.path.join(config['audio_path'], audio_file)
+    audio = AudioSegment.from_file(audio_path)
+    # Apply compression
+    compressed_audio = compress_dynamic_range(audio, threshold=-20.0, ratio=8.0, attack=1.0, release=10.0)
+    # Normalize audio
+    target_dBFS = -10.0
+    change_in_dBFS = target_dBFS - compressed_audio.dBFS
+    normalized_audio = compressed_audio.apply_gain(change_in_dBFS)
+    # Split the audio into chunks of the same duration as the frames
+    audio_chunks = make_chunks(normalized_audio, frame_duration_ms)
+    # Function to calculate decibels of a chunk
+    def calculate_decibels(chunk):
+        return chunk.dBFS
+    # Decide whether to use dynamic threshold or a fixed threshold
+    if config["dynamic_threshold"] == 1:
+        # Calculate average decibels
+        average_dBFS = sum(chunk.dBFS for chunk in audio_chunks) / len(audio_chunks)
+        decibel_threshold = average_dBFS + 4  # Set threshold above average
+    else:
+        decibel_threshold = config['decibel_threshold']
+    # Function to generate frames
+    def make_frame(t):
+        global last_blink_time
+        frame_index = int(t * frame_rate)
+        if should_blink(t, last_blink_time):
+            last_blink_time = t
+        if 0 <= (t - last_blink_time) <= blink_duration:
+            if frame_index < len(audio_chunks):
+                chunk = audio_chunks[frame_index]
+                decibels = calculate_decibels(chunk)
+                return open_mouth_blinking_cv if decibels > decibel_threshold else closed_mouth_blinking_cv
+            else:
+                return closed_mouth_blinking_cv
+        if frame_index < len(audio_chunks):
+            chunk = audio_chunks[frame_index]
+            decibels = calculate_decibels(chunk)
+            return open_mouth_cv if decibels > decibel_threshold else closed_mouth_cv
+        else:
+            return closed_mouth_cv
+    # Create a video clip
+    video_clip = VideoClip(make_frame, duration=len(audio_chunks) / frame_rate)
+    # Load the audio
+    audio_clip = AudioFileClip(audio_path)
+    # Set the audio of the video to the loaded audio
+    video_with_audio = video_clip.set_audio(audio_clip)
+    # Write the final video with audio
+    output_video_path = os.path.join(config['output_path'], f"{audio_file.split('.')[0]}.mp4")
+    video_with_audio.write_videofile(output_video_path, fps=frame_rate, codec=config['codec'], audio_codec=config["audio_codec"])
+print("Animation created successfully!")

output/test.mp4 ADDED Viewed

Binary file (233 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+pydub
+opencv-python
+numpy
+pillow
+moviepy

run.bat ADDED Viewed

	@@ -0,0 +1 @@


1	+ python main.py