Blane187 commited on
Commit
9d86442
1 Parent(s): 03d6d06

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,35 +1,3 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
3
+ audio/test.wav filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,12 +1,32 @@
1
- ---
2
- title: Audio Mouth
3
- emoji: 🚀
4
- colorFrom: purple
5
- colorTo: pink
6
- sdk: gradio
7
- sdk_version: 4.44.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AudioMouth
2
+
3
+ AudioMouth is a simple Python app that generates animated videos by syncing mouth movements with audio decibel levels. It processes an audio file and switches between images (open and closed mouth) to create a lip-sync effect.
4
+
5
+ ## Features
6
+ - Syncs mouth images to audio based on decibel levels.
7
+ - Custom FPS.
8
+ - Outputs video with green screen background (or a custom color) for chroma keying.
9
+
10
+ ## Installation
11
+ Git clone the repository and install the required dependencies. You can do this by opening the command line in the AudioMouth folder and running:
12
+
13
+ ```bash
14
+ git clone https://github.com/luisesantillan/AudioMouth
15
+ cd AudioMouth
16
+ pip install -r requirements.txt
17
+ ```
18
+ ## Usage
19
+ Add 1-4 images in the frames folder and modify the paths in the config.json to use the images you want.
20
+ Put the audios into the audio folder. It will create as many animations as there are audios.
21
+
22
+ closed_mouth | closed_mouth_blinking | open_mouth | open_mouth_blinking
23
+ :-------------------------:|:-------------------------:|:-------------------------:|:-------------------------:
24
+ ![closed_mouth](https://github.com/user-attachments/assets/3ed0c597-df0e-4165-98d4-cf978e1338bb) | ![closed_mouth_blinking](https://github.com/user-attachments/assets/1296c2a7-4304-4935-b398-4ee5e1fe8a10) | ![open_mouth](https://github.com/user-attachments/assets/4715a73a-1a27-4ac9-a20b-954dde0aac0b) | ![open_mouth_blinking](https://github.com/user-attachments/assets/b7d04648-9158-4dd2-889c-27c67a64e0b2)
25
+
26
+ If you're on Windows, now you can open run.bat and the output will be saved in the output folder.
27
+ If you're on Linux, simply run the main.py file.
28
+
29
+ https://github.com/user-attachments/assets/dcf3728c-0d3b-455d-b17e-5e9819be069b
30
+
31
+
32
+
audio/test.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1988fd65b06aef6657441ab017d4cf40f86ca36005bb1df3a26b3c7ad6628dd8
3
+ size 1269936
config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "background_color": [0, 255, 0],
3
+ "frame_paths": {
4
+ "closed_mouth": "frames/closed_mouth.png",
5
+ "open_mouth": "frames/open_mouth.png",
6
+ "closed_mouth_blinking": "frames/closed_mouth_blinking.png",
7
+ "open_mouth_blinking": "frames/open_mouth_blinking.png"
8
+ },
9
+ "output_path":"output",
10
+ "frame_duration_ms":1000,
11
+ "audio_path": "audio",
12
+ "blink_duration": 0.15,
13
+ "minimum_blinking_delay":2,
14
+ "maximum_blinking_delay": 5,
15
+ "initial_blink_time": -2,
16
+ "frame_rate": 24,
17
+ "dynamic_threshold": 1,
18
+ "decibel_threshold": -30.0,
19
+ "codec": "libx264",
20
+ "audio_codec": "aac"
21
+ }
frames/closed_mouth.png ADDED
frames/closed_mouth_blinking.png ADDED
frames/open_mouth.png ADDED
frames/open_mouth_blinking.png ADDED
main.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, random, json
2
+ import numpy as np
3
+ from pydub import AudioSegment
4
+ from pydub.utils import make_chunks
5
+ from pydub.effects import compress_dynamic_range
6
+ from PIL import Image
7
+ import cv2
8
+ from moviepy.editor import VideoClip, AudioFileClip
9
+
10
+ # Load configuration
11
+ with open('config.json', 'r') as config_file:
12
+ config = json.load(config_file)
13
+
14
+ # Load the images
15
+ closed_mouth_img = Image.open(config['frame_paths']['closed_mouth'])
16
+ open_mouth_img = Image.open(config['frame_paths']['open_mouth'])
17
+ closed_mouth_blinking_img = Image.open(config['frame_paths']['closed_mouth_blinking'])
18
+ open_mouth_blinking_img = Image.open(config['frame_paths']['open_mouth_blinking'])
19
+
20
+ # Create a background with the color from config
21
+ background_color = tuple(config['background_color'])
22
+ background = Image.new('RGBA', closed_mouth_img.size, background_color)
23
+
24
+ # Composite the images with the background
25
+ closed_mouth_img = Image.alpha_composite(background, closed_mouth_img)
26
+ open_mouth_img = Image.alpha_composite(background, open_mouth_img)
27
+ closed_mouth_blinking_img = Image.alpha_composite(background, closed_mouth_blinking_img)
28
+ open_mouth_blinking_img = Image.alpha_composite(background, open_mouth_blinking_img)
29
+
30
+ # Convert images to OpenCV format
31
+ closed_mouth_cv = cv2.cvtColor(np.array(closed_mouth_img), cv2.COLOR_RGBA2RGB)
32
+ open_mouth_cv = cv2.cvtColor(np.array(open_mouth_img), cv2.COLOR_RGBA2RGB)
33
+ closed_mouth_blinking_cv = cv2.cvtColor(np.array(closed_mouth_blinking_img), cv2.COLOR_RGBA2RGB)
34
+ open_mouth_blinking_cv = cv2.cvtColor(np.array(open_mouth_blinking_img), cv2.COLOR_RGBA2RGB)
35
+
36
+ # Decide whether to blink
37
+ def should_blink(t, last_blink_time):
38
+ if t - last_blink_time > random.uniform(config['minimum_blinking_delay'],config['maximum_blinking_delay']):
39
+ return True
40
+ return False
41
+
42
+ blink_duration = config['blink_duration']
43
+ last_blink_time = config['initial_blink_time']
44
+
45
+ # Set parameters
46
+ frame_rate = config['frame_rate']
47
+ frame_duration_ms = config['frame_duration_ms'] // frame_rate
48
+
49
+ for audio_file in os.listdir(config['audio_path']):
50
+ # Load the audio
51
+ audio_path = os.path.join(config['audio_path'], audio_file)
52
+ audio = AudioSegment.from_file(audio_path)
53
+
54
+ # Apply compression
55
+ compressed_audio = compress_dynamic_range(audio, threshold=-20.0, ratio=8.0, attack=1.0, release=10.0)
56
+
57
+ # Normalize audio
58
+ target_dBFS = -10.0
59
+ change_in_dBFS = target_dBFS - compressed_audio.dBFS
60
+ normalized_audio = compressed_audio.apply_gain(change_in_dBFS)
61
+
62
+ # Split the audio into chunks of the same duration as the frames
63
+ audio_chunks = make_chunks(normalized_audio, frame_duration_ms)
64
+
65
+ # Function to calculate decibels of a chunk
66
+ def calculate_decibels(chunk):
67
+ return chunk.dBFS
68
+
69
+ # Decide whether to use dynamic threshold or a fixed threshold
70
+ if config["dynamic_threshold"] == 1:
71
+ # Calculate average decibels
72
+ average_dBFS = sum(chunk.dBFS for chunk in audio_chunks) / len(audio_chunks)
73
+ decibel_threshold = average_dBFS + 4 # Set threshold above average
74
+ else:
75
+ decibel_threshold = config['decibel_threshold']
76
+
77
+ # Function to generate frames
78
+ def make_frame(t):
79
+ global last_blink_time
80
+ frame_index = int(t * frame_rate)
81
+
82
+ if should_blink(t, last_blink_time):
83
+ last_blink_time = t
84
+
85
+ if 0 <= (t - last_blink_time) <= blink_duration:
86
+ if frame_index < len(audio_chunks):
87
+ chunk = audio_chunks[frame_index]
88
+ decibels = calculate_decibels(chunk)
89
+
90
+ return open_mouth_blinking_cv if decibels > decibel_threshold else closed_mouth_blinking_cv
91
+ else:
92
+ return closed_mouth_blinking_cv
93
+
94
+ if frame_index < len(audio_chunks):
95
+ chunk = audio_chunks[frame_index]
96
+ decibels = calculate_decibels(chunk)
97
+
98
+ return open_mouth_cv if decibels > decibel_threshold else closed_mouth_cv
99
+ else:
100
+ return closed_mouth_cv
101
+
102
+ # Create a video clip
103
+ video_clip = VideoClip(make_frame, duration=len(audio_chunks) / frame_rate)
104
+
105
+ # Load the audio
106
+ audio_clip = AudioFileClip(audio_path)
107
+
108
+ # Set the audio of the video to the loaded audio
109
+ video_with_audio = video_clip.set_audio(audio_clip)
110
+
111
+ # Write the final video with audio
112
+ output_video_path = os.path.join(config['output_path'], f"{audio_file.split('.')[0]}.mp4")
113
+ video_with_audio.write_videofile(output_video_path, fps=frame_rate, codec=config['codec'], audio_codec=config["audio_codec"])
114
+
115
+ print("Animation created successfully!")
output/test.mp4 ADDED
Binary file (233 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pydub
2
+ opencv-python
3
+ numpy
4
+ pillow
5
+ moviepy
run.bat ADDED
@@ -0,0 +1 @@
 
 
1
+ python main.py