Andreyalth commited on
Commit
40a86aa
1 Parent(s): 7ce535b

agregar archivos

Browse files
Files changed (2) hide show
  1. interfaz.py +76 -0
  2. requirements.txt +4 -0
interfaz.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from nemo.collections.asr.models import EncDecSpeakerLabelModel
4
+ import json
5
+
6
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu" )
7
+
8
+ THRESHOLD = 0.60
9
+
10
+ model_name = "nvidia/speakerverification_en_titanet_large"
11
+ model = EncDecSpeakerLabelModel.from_pretrained(model_name).to(device)
12
+
13
+ def create_voice_print(audio):
14
+ if not audio:
15
+ return json.dumps({ "error": "no se proporciono un audio"})
16
+
17
+ embs1 = model.get_embedding(audio).squeeze()
18
+
19
+ X = embs1 / torch.linalg.norm(embs1)
20
+
21
+ # return X.tolist()
22
+ return X
23
+
24
+ def compare_voice_print(X, Y):
25
+ # Score
26
+ similarity_score = torch.dot(X, Y) / ((torch.dot(X, X) * torch.dot(Y, Y)) ** 0.5)
27
+ similarity_score = (similarity_score + 1) / 2
28
+ return similarity_score.item()
29
+
30
+ # encontrar como ejecutar la huella de voz
31
+ def find_matches(file, voice_print):
32
+ matches = []
33
+ if not file:
34
+ return json.dumps({"error": "No se proporcionó un archivo JSON"})
35
+
36
+ try:
37
+
38
+ json_content = json.load(open(file))
39
+ except json.JSONDecodeError:
40
+ return json.dumps({"error": "El archivo JSON no es válido"})
41
+
42
+ data = json_content.get("data", [])
43
+
44
+ # Convertir a tensor
45
+ voice_print = torch.tensor(json.loads(voice_print))
46
+
47
+ for speaker in data:
48
+ speaker_voice_print = torch.tensor(json.loads(speaker['voice_print']))
49
+ # speaker_voice_print = eval(speaker['voice_print'])
50
+ similarity_score = compare_voice_print(voice_print, speaker_voice_print)
51
+ print(similarity_score)
52
+ if similarity_score >= THRESHOLD:
53
+ matches.append({ "speaker": speaker, "similarity_score": similarity_score })
54
+
55
+ matches.sort(key=lambda match: match['similarity_score'], reverse=True)
56
+ return matches[:3]
57
+
58
+
59
+ voice_print_maker = gr.Interface(
60
+ fn=create_voice_print,
61
+ inputs=[gr.Audio(type="filepath")],
62
+ outputs=gr.JSON(),
63
+ )
64
+
65
+ voice_prints_loader = gr.Interface(
66
+ fn=find_matches,
67
+ inputs=[
68
+ gr.File(type="filepath", label="Upload JSON file"),
69
+ gr.TextArea()
70
+ ],
71
+ outputs=gr.JSON(),
72
+ )
73
+
74
+ demo = gr.TabbedInterface([voice_print_maker, voice_prints_loader], ["app", "loader"])
75
+
76
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio==4.29.0
2
+ torch==2.4.0
3
+ nemo_toolkit==1.23.0
4
+ huggingface-hub==0.23.2