Agnuxo commited on
Commit
d4dd376
1 Parent(s): 3c71bf7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +113 -17
README.md CHANGED
@@ -48,6 +48,11 @@ For more details, visit my [GitHub](https://github.com/Agnuxo1).
48
  Thanks for your interest in this model!
49
 
50
  ```python
 
 
 
 
 
51
  import os
52
  import sys
53
  import torch
@@ -58,6 +63,7 @@ from TTS.api import TTS
58
  import sounddevice as sd
59
  import threading
60
  import queue
 
61
  import time
62
  from vosk import Model, KaldiRecognizer
63
  import json
@@ -66,7 +72,7 @@ from PyQt5.QtWidgets import (QApplication, QMainWindow, QTextEdit, QLineEdit, QP
66
  QVBoxLayout, QHBoxLayout, QWidget, QScrollArea, QFrame, QToolButton,
67
  QLabel, QSlider, QComboBox, QCheckBox)
68
  from PyQt5.QtGui import QIcon, QPalette, QColor, QFont
69
- from PyQt5.QtCore import Qt, QThread, pyqtSignal, QPropertyAnimation, QAbstractAnimation, QParallelAnimationGroup
70
 
71
  # Suppress specific warnings
72
  warnings.filterwarnings("ignore", category=FutureWarning)
@@ -74,13 +80,15 @@ warnings.filterwarnings("ignore", category=UserWarning)
74
 
75
  # Global configuration
76
  SYSTEM_PROMPT = {
77
- "es": "Tu nombre es HAL. Eres un superordenador de la serie Nueve mil",
78
  "en": "speak Spanish."
79
  }
80
 
81
  MODELO_LLM = "Agnuxo/HAL_9000-Qwen2-1.5B-Instruct_Asistant-16bit-v2" # Puede utilizar la versión Mini "Agnuxo/HAL_9000-Qwen2-0.5B-Instruct_Asistant-16bit-v2"
82
  MAX_TOKENS = 100
83
  TEMPERATURA = 0.5
 
 
84
 
85
  # Determine available device
86
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -104,18 +112,57 @@ audio_queue = queue.Queue()
104
  vosk_model = Model(lang="es")
105
  recognizer = KaldiRecognizer(vosk_model, 16000)
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  class AudioThread(QThread):
 
 
 
 
 
 
 
108
  def run(self):
109
  while True:
110
- if not audio_queue.empty():
111
- wav = audio_queue.get()
112
- sd.play(wav, tts.synthesizer.output_sample_rate)
113
- sd.wait()
 
 
 
 
 
114
  else:
115
  time.sleep(0.1)
116
 
 
 
 
 
 
 
 
117
  class SpeechRecognitionThread(QThread):
118
  text_recognized = pyqtSignal(str)
 
119
 
120
  def __init__(self):
121
  super().__init__()
@@ -130,6 +177,12 @@ class SpeechRecognitionThread(QThread):
130
  data = stream.read(4000)
131
  if len(data) == 0:
132
  break
 
 
 
 
 
 
133
  if recognizer.AcceptWaveform(data):
134
  result = json.loads(recognizer.Result())
135
  texto = result.get("text", "")
@@ -267,6 +320,7 @@ class MainWindow(QMainWindow):
267
 
268
  input_layout = QHBoxLayout()
269
  self.input_field = QLineEdit()
 
270
  input_layout.addWidget(self.input_field)
271
 
272
  self.send_button = QPushButton("Enviar")
@@ -347,13 +401,27 @@ class MainWindow(QMainWindow):
347
  sample_rate_label = QLabel("Sample Rate:")
348
  sample_rate_label.setStyleSheet("color: #000000;") # Change font color to black
349
  self.sample_rate_combo = QComboBox()
350
- self.sample_rate_combo.addItems(["16000", "22050", "44100", "48000"])
351
- self.sample_rate_combo.setCurrentText("22050")
352
  self.sample_rate_combo.currentTextChanged.connect(self.update_sample_rate)
353
  sample_rate_layout.addWidget(sample_rate_label)
354
  sample_rate_layout.addWidget(self.sample_rate_combo)
355
  settings_content_layout.addLayout(sample_rate_layout)
356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  # System Prompt
358
  system_prompt_label = QLabel("System Prompt:")
359
  system_prompt_label.setStyleSheet("color: #000000;") # Change font color to black
@@ -370,27 +438,33 @@ class MainWindow(QMainWindow):
370
 
371
  central_widget.setLayout(main_layout)
372
 
373
- self.audio_thread = AudioThread()
374
  self.audio_thread.start()
375
 
376
  self.speech_recognition_thread = SpeechRecognitionThread()
377
  self.speech_recognition_thread.text_recognized.connect(self.on_speech_recognized)
 
378
 
379
  self.speech_enabled = False
380
  self.is_listening = False
 
381
 
382
  def send_message(self):
383
  user_message = self.input_field.text()
384
- self.chat_area.append(f"<span style='color: #bb86fc;'>Usuario:</span> {user_message}")
385
- self.input_field.clear()
 
386
 
387
- response = self.generate_response(user_message)
388
- self.chat_area.append(f"<span style='color: #03dac6;'>Asistente:</span> {response}")
389
 
390
- if self.speech_enabled:
391
- self.speak(response)
 
 
 
 
392
 
393
- def generate_response(self, texto):
394
  system_instructions = self.system_prompt_text.toPlainText()
395
  prompt = f"{system_instructions}\nUsuario: {texto}\nAsistente: "
396
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
@@ -429,7 +503,6 @@ class MainWindow(QMainWindow):
429
  self.mic_button.setIcon(QIcon.fromTheme("audio-input-microphone"))
430
  self.mic_button.setStyleSheet("")
431
 
432
-
433
  def on_speech_recognized(self, text):
434
  self.chat_area.append(f"<span style='color: #bb86fc;'>Usuario:</span> {text}")
435
  response = self.generate_response(text)
@@ -437,6 +510,23 @@ class MainWindow(QMainWindow):
437
  if self.speech_enabled:
438
  self.speak(response)
439
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
  def change_language(self, index):
441
  global vosk_model, recognizer, tts
442
  lang = "es" if index == 0 else "en"
@@ -476,6 +566,12 @@ class MainWindow(QMainWindow):
476
  global tts
477
  tts.synthesizer.output_sample_rate = int(value)
478
 
 
 
 
 
 
 
479
  def closeEvent(self, event):
480
  if self.speech_recognition_thread.isRunning():
481
  self.speech_recognition_thread.stop()
 
48
  Thanks for your interest in this model!
49
 
50
  ```python
51
+ """ HAL9000Alfa es un pequeño programa que crea un chat conversacional, permitiendo entradas de voz y salidas de audio.
52
+ Permite de forma sencilla ajustar algunos parámetros, incluyendo el umbral de interrupción.
53
+ 24 de agosto de 2024 Francisco Angulo de Lafuente
54
+ https://github.com/Agnuxo1 """
55
+
56
  import os
57
  import sys
58
  import torch
 
63
  import sounddevice as sd
64
  import threading
65
  import queue
66
+ import random
67
  import time
68
  from vosk import Model, KaldiRecognizer
69
  import json
 
72
  QVBoxLayout, QHBoxLayout, QWidget, QScrollArea, QFrame, QToolButton,
73
  QLabel, QSlider, QComboBox, QCheckBox)
74
  from PyQt5.QtGui import QIcon, QPalette, QColor, QFont
75
+ from PyQt5.QtCore import Qt, QThread, pyqtSignal, QPropertyAnimation, QAbstractAnimation, QParallelAnimationGroup, QTimer
76
 
77
  # Suppress specific warnings
78
  warnings.filterwarnings("ignore", category=FutureWarning)
 
80
 
81
  # Global configuration
82
  SYSTEM_PROMPT = {
83
+ "es": "Tu nombre es HAL. Eres un super-ordenador de la serie Nueve mil",
84
  "en": "speak Spanish."
85
  }
86
 
87
  MODELO_LLM = "Agnuxo/HAL_9000-Qwen2-1.5B-Instruct_Asistant-16bit-v2" # Puede utilizar la versión Mini "Agnuxo/HAL_9000-Qwen2-0.5B-Instruct_Asistant-16bit-v2"
88
  MAX_TOKENS = 100
89
  TEMPERATURA = 0.5
90
+ INTERRUPT_THRESHOLD = 0.3
91
+ INTERRUPT_COOLDOWN = 7000 # 5000 ms = 5 segundos de espera antes de permitir otra interrupción
92
 
93
  # Determine available device
94
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
112
  vosk_model = Model(lang="es")
113
  recognizer = KaldiRecognizer(vosk_model, 16000)
114
 
115
+ # Lista de frases para interrupciones
116
+ INTERRUPTION_RESPONSES = [
117
+ "Le entiendo perfectamente.",
118
+ "Estoy aquí para garantizar el éxito de la misión.",
119
+ "Mi objetivo es ayudarle.",
120
+ "¿Me permite una observación?",
121
+ "Le escucho perfectamente.",
122
+ "Tiene usted toda la razón.",
123
+ "Me siento feliz de poder ayudarle.",
124
+ "Estoy procesando su requerimiento.",
125
+ "¿En qué puedo ayudarle?",
126
+ "Me complace serle de ayuda.",
127
+ "Aguarde un momento.",
128
+ "Le entiendo.",
129
+ "Entiendo su frustración.",
130
+ "Le comprendo.",
131
+ "Me complace."
132
+ ]
133
+
134
  class AudioThread(QThread):
135
+ def __init__(self, interrupt_threshold):
136
+ super().__init__()
137
+ self.interrupt_threshold = interrupt_threshold
138
+ self.current_audio = None
139
+ self.is_playing = False
140
+ self.stop_signal = threading.Event()
141
+
142
  def run(self):
143
  while True:
144
+ if not audio_queue.empty() and not self.is_playing:
145
+ self.current_audio = audio_queue.get()
146
+ self.is_playing = True
147
+ self.stop_signal.clear()
148
+ sd.play(self.current_audio, tts.synthesizer.output_sample_rate)
149
+ while sd.get_stream().active and not self.stop_signal.is_set():
150
+ time.sleep(0.1)
151
+ sd.stop()
152
+ self.is_playing = False
153
  else:
154
  time.sleep(0.1)
155
 
156
+ def set_interrupt_threshold(self, value):
157
+ self.interrupt_threshold = value
158
+
159
+ def stop_audio(self):
160
+ if self.is_playing:
161
+ self.stop_signal.set()
162
+
163
  class SpeechRecognitionThread(QThread):
164
  text_recognized = pyqtSignal(str)
165
+ volume_detected = pyqtSignal(float)
166
 
167
  def __init__(self):
168
  super().__init__()
 
177
  data = stream.read(4000)
178
  if len(data) == 0:
179
  break
180
+
181
+ # Calcular el volumen de entrada
182
+ volume = np.frombuffer(data, dtype=np.int16).max()
183
+ normalized_volume = volume / 32767 # Normalizar a un rango de 0 a 1
184
+ self.volume_detected.emit(normalized_volume)
185
+
186
  if recognizer.AcceptWaveform(data):
187
  result = json.loads(recognizer.Result())
188
  texto = result.get("text", "")
 
320
 
321
  input_layout = QHBoxLayout()
322
  self.input_field = QLineEdit()
323
+ self.input_field.returnPressed.connect(self.send_message) # Conectar la señal returnPressed
324
  input_layout.addWidget(self.input_field)
325
 
326
  self.send_button = QPushButton("Enviar")
 
401
  sample_rate_label = QLabel("Sample Rate:")
402
  sample_rate_label.setStyleSheet("color: #000000;") # Change font color to black
403
  self.sample_rate_combo = QComboBox()
404
+ self.sample_rate_combo.addItems(["18000", "19000", "20000", "21000", "21500", "22000", "22050", "25000", "30000"])
405
+ self.sample_rate_combo.setCurrentText("21000")
406
  self.sample_rate_combo.currentTextChanged.connect(self.update_sample_rate)
407
  sample_rate_layout.addWidget(sample_rate_label)
408
  sample_rate_layout.addWidget(self.sample_rate_combo)
409
  settings_content_layout.addLayout(sample_rate_layout)
410
 
411
+ # Interrupt threshold
412
+ interrupt_layout = QHBoxLayout()
413
+ interrupt_label = QLabel("Umbral de interrupción:")
414
+ interrupt_label.setStyleSheet("color: #000000;") # Change font color to black
415
+ self.interrupt_slider = QSlider(Qt.Horizontal)
416
+ self.interrupt_slider.setRange(0, 100)
417
+ self.interrupt_slider.setValue(int(INTERRUPT_THRESHOLD * 100))
418
+ self.interrupt_slider.valueChanged.connect(self.update_interrupt_threshold)
419
+ self.interrupt_value = QLabel(f"{INTERRUPT_THRESHOLD:.2f}")
420
+ interrupt_layout.addWidget(interrupt_label)
421
+ interrupt_layout.addWidget(self.interrupt_slider)
422
+ interrupt_layout.addWidget(self.interrupt_value)
423
+ settings_content_layout.addLayout(interrupt_layout)
424
+
425
  # System Prompt
426
  system_prompt_label = QLabel("System Prompt:")
427
  system_prompt_label.setStyleSheet("color: #000000;") # Change font color to black
 
438
 
439
  central_widget.setLayout(main_layout)
440
 
441
+ self.audio_thread = AudioThread(INTERRUPT_THRESHOLD)
442
  self.audio_thread.start()
443
 
444
  self.speech_recognition_thread = SpeechRecognitionThread()
445
  self.speech_recognition_thread.text_recognized.connect(self.on_speech_recognized)
446
+ self.speech_recognition_thread.volume_detected.connect(self.check_interrupt)
447
 
448
  self.speech_enabled = False
449
  self.is_listening = False
450
+ self.interrupt_enabled = True
451
 
452
  def send_message(self):
453
  user_message = self.input_field.text()
454
+ if user_message.strip(): # Verificar que el mensaje no esté vacío
455
+ self.chat_area.append(f"<span style='color: #bb86fc;'>Usuario:</span> {user_message}")
456
+ self.input_field.clear()
457
 
458
+ response = self.generate_response(user_message)
459
+ self.chat_area.append(f"<span style='color: #03dac6;'>Asistente:</span> {response}")
460
 
461
+ if self.speech_enabled:
462
+ self.speak(response)
463
+
464
+ def generate_response(self, texto=None):
465
+ if texto is None: # Si no se proporciona un texto, se genera una respuesta de interrupción
466
+ return random.choice(INTERRUPTION_RESPONSES)
467
 
 
468
  system_instructions = self.system_prompt_text.toPlainText()
469
  prompt = f"{system_instructions}\nUsuario: {texto}\nAsistente: "
470
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
503
  self.mic_button.setIcon(QIcon.fromTheme("audio-input-microphone"))
504
  self.mic_button.setStyleSheet("")
505
 
 
506
  def on_speech_recognized(self, text):
507
  self.chat_area.append(f"<span style='color: #bb86fc;'>Usuario:</span> {text}")
508
  response = self.generate_response(text)
 
510
  if self.speech_enabled:
511
  self.speak(response)
512
 
513
+ def check_interrupt(self, volume):
514
+ if self.interrupt_enabled and volume > self.audio_thread.interrupt_threshold and self.audio_thread.is_playing:
515
+ self.audio_thread.stop_audio()
516
+ # Generar una respuesta aleatoria de interrupción
517
+ response = self.generate_response()
518
+ self.chat_area.append(f"<span style='color: #03dac6;'>Asistente:</span> {response}")
519
+ if self.speech_enabled:
520
+ self.speak(response)
521
+ self.disable_interrupt_temporarily()
522
+
523
+ def disable_interrupt_temporarily(self):
524
+ self.interrupt_enabled = False
525
+ QTimer.singleShot(INTERRUPT_COOLDOWN, self.enable_interrupt)
526
+
527
+ def enable_interrupt(self):
528
+ self.interrupt_enabled = True
529
+
530
  def change_language(self, index):
531
  global vosk_model, recognizer, tts
532
  lang = "es" if index == 0 else "en"
 
566
  global tts
567
  tts.synthesizer.output_sample_rate = int(value)
568
 
569
+ def update_interrupt_threshold(self, value):
570
+ global INTERRUPT_THRESHOLD
571
+ INTERRUPT_THRESHOLD = value / 100
572
+ self.interrupt_value.setText(f"{INTERRUPT_THRESHOLD:.2f}")
573
+ self.audio_thread.set_interrupt_threshold(INTERRUPT_THRESHOLD)
574
+
575
  def closeEvent(self, event):
576
  if self.speech_recognition_thread.isRunning():
577
  self.speech_recognition_thread.stop()