heea
/

image-to-music

Model card Files Files and versions Community

heea commited on Jul 10, 2023

Commit

5d09370

•

1 Parent(s): 3485955

Update README.md

Files changed (1) hide show

README.md +45 -40

README.md CHANGED Viewed

@@ -1,40 +1,45 @@
----
-license: other
----
-from PIL import Image
-import mido
-def convert_pixel_to_midi(pixel):
-    # Convert pixel values to MIDI values
-    # Define your custom logic here
-def convert_image_to_midi(image_path):
-    # Open the image
-    image = Image.open(image_path)
-    # Extract the pixel data from the image
-    pixels = image.load()
-    # Create a new MIDI file
-    mid = mido.MidiFile()
-    # Create a new track
-    track = mido.MidiTrack()
-    mid.tracks.append(track)
-    # Set the desired tempo and other necessary MIDI events
-    track.append(mido.MetaMessage('set_tempo', tempo=500000, time=0))  # Adjust the tempo as needed
-    # Iterate over each pixel in the image
-    for y in range(image.height):
-        for x in range(image.width):
-            # Get the RGB values of the pixel
-            r, g, b = pixels[x, y]
-            # Convert the pixel values to MIDI values
-            midi_value = convert_pixel_to_midi((r, g, b))
-            # Add a note-on event to the MIDI track
-            track.append(mido.Message('note_on', note=midi_value, velocity=64, time=0))  # Adjust velocity as needed
-            # Add a note-off event after a certain duration

+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.io.wavfile import write
+# Load the image
+image_path = 'hugging_face.jpg'
+image = plt.imread(image_path)
+# Convert the image to grayscale
+grayscale_image = np.mean(image, axis=2)
+# Define the musical parameters
+sample_rate = 44100  # Audio sample rate (Hz)
+duration = 0.1  # Duration of each note (seconds)
+# Define the mapping from pixel values to musical notes
+min_note = 40  # MIDI note number for the lowest pixel value
+max_note = 80  # MIDI note number for the highest pixel value
+# Rescale the pixel values to the range [min_note, max_note]
+scaled_image = (grayscale_image - np.min(grayscale_image))
+scaled_image *= (max_note - min_note) / np.max(scaled_image)
+scaled_image += min_note
+# Generate the audio signal
+total_duration = int(duration * sample_rate * grayscale_image.shape[1])
+t = np.linspace(0, total_duration / sample_rate, total_duration, endpoint=False)
+audio_signal = np.zeros(total_duration)
+for i, column in enumerate(scaled_image.T):
+    start = int(i * duration * sample_rate)
+    end = int((i + 1) * duration * sample_rate)
+    audio_signal[start:end] = np.sin(2 * np.pi * column * t[start:end])
+# Normalize the audio signal
+audio_signal /= np.max(np.abs(audio_signal))
+audio_signal *= 32767  # Scale the signal to the range of a 16-bit integer
+# Convert the audio signal to 16-bit integer format
+audio_signal = audio_signal.astype(np.int16)
+# Save the audio signal to a WAV file
+output_file = 'hugging_face.wav'
+write(output_file, sample_rate, audio_signal)
+print(f"Audio file '{output_file}' generated successfully!")