Spaces:

2000prasanth
/

audio_deepfake_detection_audiointegritynet

Running

App Files Files Community

audio_deepfake_detection_audiointegritynet / app.py

2000prasanth

Update app.py

51c17e6 verified 2 months ago

raw

history blame contribute delete

5.56 kB

	import numpy as np
	import librosa
	import tensorflow as tf
	import streamlit as st
	import plotly.express as px
	import pandas as pd
	import soundfile as sf
	from pydub import AudioSegment
	import matplotlib.pyplot as plt
	import time

	# Parameters for audio processing
	window_length = 0.02 # 20ms
	hop_length = 0.0025 # 2.5ms
	sample_rate = 22050
	global inference_time
	inference_time=1
	# Load TFLite model
	interpreter = tf.lite.Interpreter(model_path=r"model_breath_logspec_mfcc_cnn.tflite")
	interpreter.allocate_tensors()
	input_details, output_details = interpreter.get_input_details(), interpreter.get_output_details()

	def convert_mp3_to_wav(mp3_path):
	audio = AudioSegment.from_mp3(mp3_path)
	wav_path = mp3_path.replace(".mp3", ".wav")
	audio.export(wav_path, format="wav")
	return wav_path

	def extract_breath_features(y, sr):
	frame_length = int(window_length * sr)
	hop_length_samples = int(hop_length * sr)
	zcr = librosa.feature.zero_crossing_rate(y=y, frame_length=frame_length, hop_length=hop_length_samples)
	rmse = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length_samples)
	breaths = (zcr.flatten() > 0.1) & (rmse.flatten() > 0.1)
	return np.where(breaths, 1, 0)

	def extract_features(y, sr, n_mels=128, n_mfcc=13):
	try:

	# Extract MFCC & Log-Mel Spectrogram
	mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
	logspec = librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels))
	breath_feature = extract_breath_features(y, sr)

	# Dynamically adjust length based on the shortest feature
	min_len = min(mfcc.shape[1], logspec.shape[1], len(breath_feature))

	# Resize instead of fix_length for better visualization
	mfcc = librosa.util.fix_length(mfcc, size=min_len, axis=1)
	logspec = librosa.util.fix_length(logspec, size=min_len, axis=1)
	breath_feature = librosa.util.fix_length(breath_feature, size=min_len)

	return np.vstack((mfcc, logspec, breath_feature))
	except Exception as e:
	st.error(f"Error processing")
	return None
	def prepare_single_data(features, max_len=500):
	features = librosa.util.fix_length(features, size=max_len, axis=1)
	return features[np.newaxis, ..., np.newaxis].astype(np.float32)

	def predict_audio(features):
	global inference_time
	start_time=time.time()
	prepared_features = prepare_single_data(features)
	interpreter.set_tensor(input_details[0]['index'], prepared_features)
	interpreter.invoke()
	prediction = interpreter.get_tensor(output_details[0]['index'])
	end_time = time.time()
	inference_time=start_time-end_time
	return np.argmax(prediction, axis=1)[0], prediction[0]

	def plot_waveform(y, sr, start_time, end_time):
	start_sample, end_sample = int(start_time * sr), int(end_time * sr)
	y_trimmed = y[start_sample:end_sample]
	times = np.linspace(start_time, end_time, num=len(y_trimmed))
	df = pd.DataFrame({"Time (s)": times, "Amplitude": y_trimmed})
	st.plotly_chart(px.line(df, x="Time (s)", y="Amplitude", title="Waveform"), use_container_width=True)
	return y_trimmed, sr

	def visualize_features(features, duration):
	time_axis = np.linspace(0, duration, features.shape[1])
	df_breath = pd.DataFrame({"Time (s)": time_axis, "Breath Feature": features[-1]})
	st.plotly_chart(px.line(df_breath, x="Time (s)", y="Breath Feature", title="Breath Feature Over Time"), use_container_width=True)

	def plot_mfcc_and_logspec(y, sr):
	mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
	logspec = librosa.amplitude_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128))
	#logspec = librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128))
	with st.sidebar:
	st.write("### MFCC and Log-Spectrogram")

	# Plot MFCC
	fig, ax = plt.subplots(figsize=(5, 3))
	librosa.display.specshow(mfcc, sr=sr, x_axis='time')
	plt.colorbar()
	plt.title("MFCC")
	st.pyplot(fig)

	# Plot Log-Spectrogram
	fig, ax = plt.subplots(figsize=(5, 3))
	librosa.display.specshow(logspec, sr=sr, x_axis='time')
	plt.colorbar()
	plt.title("Log-Mel Spectrogram")
	st.pyplot(fig)

	st.title('Audio Integrity Net')
	st.subheader('Documentation to be added')
	uploaded_file = st.file_uploader('Upload an audio file', type=['wav', 'mp3'])

	if uploaded_file:
	with open('temp_audio.wav', 'wb') as f:
	f.write(uploaded_file.getbuffer())

	y, sr = librosa.load('temp_audio.wav', sr=sample_rate)
	duration = librosa.get_duration(y=y, sr=sr)
	start_time, end_time = st.slider("Select time range", 0.0, duration, (0.0, duration))
	y_trimmed, sr = plot_waveform(y, sr, start_time, end_time)

	if st.sidebar.button("Show MFCC & Log-Spectrogram"):
	plot_mfcc_and_logspec(y_trimmed, sr)

	features = extract_features(y_trimmed, sr)
	if features is not None:
	st.success("Feature Extraction Completed!")
	visualize_features(features, end_time - start_time)
	prediction, probability = predict_audio(features)
	if prediction==0:
	st.subheader(f' Predicted class is Real ')
	else:
	st.subheader(f'Predicted class is Fake')
	st.write(f'Probability of being real: {probability[0] * 100:.2f}%')
	st.write(f'Probability of being fake: {probability[1] * 100:.2f}%')
	inference_time=abs(inference_time)
	st.write(f"Inference Time: {inference_time:.6f} seconds")