Spaces:

Politrees
/

Audio-Steganography

Running

App Files Files Community

Audio-Steganography / app.py

Politrees

Update app.py

2a3080e verified 6 months ago

raw

history blame contribute delete

11.8 kB

	import logging
	import tempfile
	import gradio as gr
	import librosa
	import librosa.display
	import matplotlib.pyplot as plt
	import numpy as np
	import soundfile as sf
	from PIL import Image, ImageDraw, ImageFont
	import os
	import cv2
	from moviepy.editor import VideoFileClip, AudioFileClip

	os.system("apt-get install -y fonts-dejavu")

	DEFAULT_FONT_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
	DEFAULT_SAMPLE_RATE = 22050

	logging.basicConfig(level=logging.INFO)
	logging.getLogger("httpx").setLevel(logging.WARNING)

	def load_font(font_path, max_font_size):
	try:
	return ImageFont.truetype(font_path, max_font_size)
	except IOError:
	logging.warning(f"Font not found at {font_path}. Using default font.")
	return ImageFont.load_default()
	except Exception as e:
	logging.error(f"An error occurred while loading the font: {e}")
	raise

	def create_text_image(text, font, base_width=512, height=256, margin=10, letter_spacing=5):
	draw = ImageDraw.Draw(Image.new("L", (1, 1)))
	text_widths = [
	draw.textbbox((0, 0), char, font=font)[2] - draw.textbbox((0, 0), char, font=font)[0]
	for char in text
	]
	text_width = sum(text_widths) + letter_spacing * (len(text) - 1)
	text_height = (
	draw.textbbox((0, 0), text[0], font=font)[3]
	- draw.textbbox((0, 0), text[0], font=font)[1]
	)

	width = max(base_width, text_width + margin * 2)
	height = max(height, text_height + margin * 2)

	image = Image.new("L", (width, height), "black")
	draw = ImageDraw.Draw(image)

	text_start_x = (width - text_width) // 2
	text_start_y = (height - text_height) // 2

	current_x = text_start_x
	for char, char_width in zip(text, text_widths):
	draw.text((current_x, text_start_y), char, font=font, fill="white")
	current_x += char_width + letter_spacing

	return np.array(image)

	def spectrogram_image_to_audio(image, sr=DEFAULT_SAMPLE_RATE):
	flipped_image = np.flipud(image)
	S = flipped_image.astype(np.float32) / 255.0 * 100.0
	y = librosa.griffinlim(S)
	return y

	def create_audio_with_spectrogram(text, base_width, height, max_font_size, margin, letter_spacing):
	font = load_font(DEFAULT_FONT_PATH, max_font_size)
	spec_image = create_text_image(text, font, base_width, height, margin, letter_spacing)
	y = spectrogram_image_to_audio(spec_image)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
	audio_path = temp_audio.name
	sf.write(audio_path, y, DEFAULT_SAMPLE_RATE)

	S = librosa.feature.melspectrogram(y=y, sr=DEFAULT_SAMPLE_RATE)
	S_dB = librosa.power_to_db(S, ref=np.max)
	plt.figure(figsize=(10, 4))
	librosa.display.specshow(S_dB, sr=DEFAULT_SAMPLE_RATE, x_axis="time", y_axis="mel")
	plt.axis("off")
	plt.tight_layout(pad=0)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_spectrogram:
	spectrogram_path = temp_spectrogram.name
	plt.savefig(spectrogram_path, bbox_inches="tight", pad_inches=0, transparent=True)
	plt.close()

	return audio_path, spectrogram_path

	def display_audio_spectrogram(audio_path):
	y, sr = librosa.load(audio_path, sr=None)
	S = librosa.feature.melspectrogram(y=y, sr=sr)
	S_dB = librosa.power_to_db(S, ref=np.max)

	plt.figure(figsize=(10, 4))
	librosa.display.specshow(S_dB, sr=sr, x_axis="time", y_axis="mel")
	plt.axis("off")
	plt.tight_layout(pad=0)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_spectrogram:
	spectrogram_path = temp_spectrogram.name
	plt.savefig(spectrogram_path, bbox_inches="tight", pad_inches=0, transparent=True)
	plt.close()
	return spectrogram_path

	def image_to_spectrogram_audio(image_path, sr=DEFAULT_SAMPLE_RATE):
	image = Image.open(image_path).convert("L")
	image = np.array(image)
	y = spectrogram_image_to_audio(image, sr)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
	img2audio_path = temp_audio.name
	sf.write(img2audio_path, y, sr)
	return img2audio_path

	def gradio_interface_fn(text, base_width, height, max_font_size, margin, letter_spacing):
	logging.info(f"Generating audio and spectrogram for text:\n{text}\n")
	audio_path, spectrogram_path = create_audio_with_spectrogram(text, base_width, height, max_font_size, margin, letter_spacing)
	return audio_path, spectrogram_path

	def gradio_image_to_audio_fn(upload_image):
	logging.info(f"Converting image to audio:\n{upload_image}\n")
	return image_to_spectrogram_audio(upload_image)

	def gradio_decode_fn(upload_audio):
	logging.info(f"Generating spectrogram for audio:\n{upload_audio}\n")
	return display_audio_spectrogram(upload_audio)

	def extract_audio(video_path):
	try:
	video = VideoFileClip(video_path)
	if video.audio is None:
	raise ValueError("No audio found in the video")
	audio_path = "extracted_audio.wav"
	video.audio.write_audiofile(audio_path)
	return audio_path
	except Exception as e:
	logging.error(f"Failed to extract audio: {e}")
	return None

	def extract_frames(video_path):
	try:
	video = cv2.VideoCapture(video_path)
	frames = []
	success, frame = video.read()
	while success:
	frames.append(frame)
	success, frame = video.read()
	video.release()
	return frames
	except Exception as e:
	logging.error(f"Failed to extract frames: {e}")
	return None

	def frame_to_spectrogram(frame, sr=DEFAULT_SAMPLE_RATE):
	gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	S = np.flipud(gray_frame.astype(np.float32) / 255.0 * 100.0)
	y = librosa.griffinlim(S)
	return y

	def save_audio(y, sr=DEFAULT_SAMPLE_RATE):
	audio_path = 'output_frame_audio.wav'
	sf.write(audio_path, y, sr)
	return audio_path

	def save_spectrogram_image(S, frame_number, temp_dir):
	plt.figure(figsize=(10, 4))
	librosa.display.specshow(S)
	plt.tight_layout()
	image_path = os.path.join(temp_dir, f'spectrogram_frame_{frame_number}.png')
	plt.savefig(image_path)
	plt.close()
	return image_path

	def process_video_frames(frames, sr=DEFAULT_SAMPLE_RATE, temp_dir=None):
	processed_frames = []
	total_frames = len(frames)
	for i, frame in enumerate(frames):
	y = frame_to_spectrogram(frame, sr)
	S = librosa.feature.melspectrogram(y=y, sr=sr)
	image_path = save_spectrogram_image(S, i, temp_dir)
	processed_frame = cv2.imread(image_path)
	processed_frames.append(processed_frame)
	return processed_frames

	def save_video_from_frames(frames, output_path, fps=30):
	height, width, layers = frames[0].shape
	video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
	for frame in frames:
	video.write(frame)
	video.release()

	def add_audio_to_video(video_path, audio_path, output_path):
	try:
	video = VideoFileClip(video_path)
	audio = AudioFileClip(audio_path)
	final_video = video.set_audio(audio)
	final_video.write_videofile(output_path, codec='libx264', audio_codec='aac')
	except Exception as e:
	logging.error(f"Failed to add audio to video: {e}")

	def process_video(video_path):
	try:
	video = VideoFileClip(video_path)
	if video.duration > 10:
	video = video.subclip(0, 10)
	temp_trimmed_video_path = "trimmed_video.mp4"
	video.write_videofile(temp_trimmed_video_path, codec='libx264')
	video_path = temp_trimmed_video_path
	except Exception as e:
	return f"Failed to load video: {e}"

	audio_path = extract_audio(video_path)
	if audio_path is None:
	return "Failed to extract audio from video."
	frames = extract_frames(video_path)
	if frames is None:
	return "Failed to extract frames from video."

	with tempfile.TemporaryDirectory() as temp_dir:
	processed_frames = process_video_frames(frames, temp_dir=temp_dir)
	temp_video_path = os.path.join(temp_dir, 'processed_video.mp4')
	save_video_from_frames(processed_frames, temp_video_path)
	output_video_path = 'output_video_with_audio.mp4'
	add_audio_to_video(temp_video_path, audio_path, output_video_path)
	return output_video_path

	def create_gradio_interface():
	with gr.Blocks(title="Audio Steganography", css="footer{display:none !important}", theme=gr.themes.Soft(primary_hue="green", secondary_hue="green", spacing_size="sm", radius_size="lg")) as txt2spec:
	with gr.Tab("Text to Spectrogram"):
	with gr.Group():
	text = gr.Textbox(lines=2, placeholder="Enter your text:", label="Enter the text you want to convert to spectrogram:")
	with gr.Row(variant="panel"):
	base_width = gr.Slider(value=512, label="Image Width", visible=False)
	height = gr.Slider(value=256, label="Image Height", visible=False)
	max_font_size = gr.Slider(minimum=10, maximum=130, step=5, value=80, label="Font size")
	margin = gr.Slider(minimum=0, maximum=50, step=1, value=10, label="Indent")
	letter_spacing = gr.Slider(minimum=0, maximum=50, step=1, value=5, label="Letter spacing")
	generate_button = gr.Button("Generate", variant="primary", size="lg")

	with gr.Column(variant="panel"):
	gr.Markdown("Caution! The sound is too loud. It is recommended to familiarize yourself with audio steganography before using this application.")
	with gr.Group():
	output_audio = gr.Audio(type="filepath", label="Generated audio")
	output_spectrogram = gr.Image(type="filepath", label="Spectrogram")

	generate_button.click(gradio_interface_fn, inputs=[text, base_width, height, max_font_size, margin, letter_spacing], outputs=[output_audio, output_spectrogram])

	with gr.Tab("Image to Spectrogram"):
	with gr.Group():
	with gr.Column():
	upload_image = gr.Image(type="filepath", label="Upload image")
	convert_button = gr.Button("Convert to audio", variant="primary", size="lg")

	with gr.Column(variant="panel"):
	gr.Markdown("Caution! The sound is too loud. It is recommended to familiarize yourself with audio steganography before using this application.")
	output_audio_from_image = gr.Audio(type="filepath", label="Generated audio")

	convert_button.click(gradio_image_to_audio_fn, inputs=[upload_image], outputs=[output_audio_from_image])

	with gr.Tab("Audio to Spectrogram"):
	with gr.Group():
	with gr.Column():
	upload_audio = gr.Audio(type="filepath", label="Upload audio", scale=3)
	decode_button = gr.Button("Show spectrogram", variant="primary", size="lg")

	with gr.Column(variant="panel"):
	decoded_image = gr.Image(type="filepath", label="Audio Spectrogram")

	decode_button.click(gradio_decode_fn, inputs=[upload_audio], outputs=[decoded_image])

	with gr.Tab("Video to Spectrogram"):
	with gr.Group():
	video_input = gr.Video(label="Upload video")
	generate_button = gr.Button("Generate", variant="primary", size="lg")

	with gr.Column(variant="panel"):
	video_output = gr.Video(label="Video Spectrogram")

	generate_button.click(process_video, inputs=[video_input], outputs=[video_output])

	return txt2spec

	if __name__ == "__main__":
	txt2spec = create_gradio_interface()
	txt2spec.launch(share=True)