heea commited on
Commit
5d09370
1 Parent(s): 3485955

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +45 -40
README.md CHANGED
@@ -1,40 +1,45 @@
1
- ---
2
- license: other
3
- ---
4
- from PIL import Image
5
- import mido
6
-
7
- def convert_pixel_to_midi(pixel):
8
- # Convert pixel values to MIDI values
9
- # Define your custom logic here
10
-
11
- def convert_image_to_midi(image_path):
12
- # Open the image
13
- image = Image.open(image_path)
14
-
15
- # Extract the pixel data from the image
16
- pixels = image.load()
17
-
18
- # Create a new MIDI file
19
- mid = mido.MidiFile()
20
-
21
- # Create a new track
22
- track = mido.MidiTrack()
23
- mid.tracks.append(track)
24
-
25
- # Set the desired tempo and other necessary MIDI events
26
- track.append(mido.MetaMessage('set_tempo', tempo=500000, time=0)) # Adjust the tempo as needed
27
-
28
- # Iterate over each pixel in the image
29
- for y in range(image.height):
30
- for x in range(image.width):
31
- # Get the RGB values of the pixel
32
- r, g, b = pixels[x, y]
33
-
34
- # Convert the pixel values to MIDI values
35
- midi_value = convert_pixel_to_midi((r, g, b))
36
-
37
- # Add a note-on event to the MIDI track
38
- track.append(mido.Message('note_on', note=midi_value, velocity=64, time=0)) # Adjust velocity as needed
39
-
40
- # Add a note-off event after a certain duration
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import numpy as np
3
+ from scipy.io.wavfile import write
4
+
5
+ # Load the image
6
+ image_path = 'hugging_face.jpg'
7
+ image = plt.imread(image_path)
8
+
9
+ # Convert the image to grayscale
10
+ grayscale_image = np.mean(image, axis=2)
11
+
12
+ # Define the musical parameters
13
+ sample_rate = 44100 # Audio sample rate (Hz)
14
+ duration = 0.1 # Duration of each note (seconds)
15
+
16
+ # Define the mapping from pixel values to musical notes
17
+ min_note = 40 # MIDI note number for the lowest pixel value
18
+ max_note = 80 # MIDI note number for the highest pixel value
19
+
20
+ # Rescale the pixel values to the range [min_note, max_note]
21
+ scaled_image = (grayscale_image - np.min(grayscale_image))
22
+ scaled_image *= (max_note - min_note) / np.max(scaled_image)
23
+ scaled_image += min_note
24
+
25
+ # Generate the audio signal
26
+ total_duration = int(duration * sample_rate * grayscale_image.shape[1])
27
+ t = np.linspace(0, total_duration / sample_rate, total_duration, endpoint=False)
28
+ audio_signal = np.zeros(total_duration)
29
+ for i, column in enumerate(scaled_image.T):
30
+ start = int(i * duration * sample_rate)
31
+ end = int((i + 1) * duration * sample_rate)
32
+ audio_signal[start:end] = np.sin(2 * np.pi * column * t[start:end])
33
+
34
+ # Normalize the audio signal
35
+ audio_signal /= np.max(np.abs(audio_signal))
36
+ audio_signal *= 32767 # Scale the signal to the range of a 16-bit integer
37
+
38
+ # Convert the audio signal to 16-bit integer format
39
+ audio_signal = audio_signal.astype(np.int16)
40
+
41
+ # Save the audio signal to a WAV file
42
+ output_file = 'hugging_face.wav'
43
+ write(output_file, sample_rate, audio_signal)
44
+
45
+ print(f"Audio file '{output_file}' generated successfully!")