File size: 3,103 Bytes
32ca76b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import torch

import numpy as np


def calc_ber(watermark_decoded_tensor, watermark_tensor, threshold=0.5):
    watermark_decoded_binary = watermark_decoded_tensor >= threshold
    watermark_binary = watermark_tensor >= threshold
    ber_tensor = 1 - (watermark_decoded_binary == watermark_binary).to(torch.float32).mean()
    return ber_tensor


def to_equal_length(original, signal_watermarked):
    if original.shape != signal_watermarked.shape:
        print("警告!输入内容长度不一致", len(original), len(signal_watermarked))
        min_length = min(len(original), len(signal_watermarked))
        original = original[0:min_length]
        signal_watermarked = signal_watermarked[0:min_length]
    assert original.shape == signal_watermarked.shape
    return original, signal_watermarked


def signal_noise_ratio(original, signal_watermarked):
    # 数值越高越好,最好的结果为无穷大
    original, signal_watermarked = to_equal_length(original, signal_watermarked)
    noise_strength = np.sum((original - signal_watermarked) ** 2)
    if noise_strength == 0:  # 说明原始信号并未改变
        return np.inf
    signal_strength = np.sum(original ** 2)
    ratio = signal_strength / noise_strength

    # np.log10(1) == 0
    # 当噪声比信号强度还高时,信噪比就是负的
    # 如果ratio是0,那么 np.log10(0) 就是负无穷 -inf
    # 这里限定一个最小值,以免出现负无穷情况
    ratio = max(1e-10, ratio)
    return 10 * np.log10(ratio)


def batch_signal_noise_ratio(original, signal_watermarked):
    signal = original.detach().cpu().numpy()
    signal_watermarked = signal_watermarked.detach().cpu().numpy()
    tmp_list = []
    for s, swm in zip(signal, signal_watermarked):
        out = signal_noise_ratio(s, swm)
        tmp_list.append(out)
    return np.mean(tmp_list)


def calc_bce_acc(predictions, ground_truth, threshold=0.5):
    assert predictions.shape == ground_truth.shape

    # 将预测值转换为类别标签
    predicted_labels = (predictions >= threshold).float()

    # 计算准确率
    accuracy = ((predicted_labels == ground_truth).float().mean().item())
    return accuracy


def resample_to16k(data, old_sr):
    # 对数据进行重采样
    new_fs = 16000
    new_data = data[::int(old_sr / new_fs)]
    return new_data


import pypesq


def pesq(signal1, signal2, sr):
    signal1, signal2 = to_equal_length(signal1, signal2)

    # Perceptual Evaluation of Speech Quality
    # [−0.5 to 4.5], PESQ>3.5 时音频质量较好,>4.0基本上就听不到了
    # 函数只支持16k或8k的输入,因此在输入前校验采样率。由于这个指标计算的是可感知性,因此这里改变采样率和水印鲁棒性是无关的
    if sr != 16000:
        signal1 = resample_to16k(signal1, sr)
        signal2 = resample_to16k(signal2, sr)

    try:
        pesq = pypesq.pesq(signal1, signal2, 16000)
        # 可能会有错误:ValueError: ref is all zeros, processing error!
    except Exception as e:
        pesq = 0
        print("pesq计算错误:", e)

    return pesq