File size: 1,648 Bytes
e112632 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
\
import re, io, os, numpy as np, soundfile as sf
from pydub import AudioSegment
DEFAULT_SAMPLE_RATE = int(os.getenv("SAMPLE_RATE", "16000"))
MAX_SEC = 2.0
N_SAMPLES = int(DEFAULT_SAMPLE_RATE*MAX_SEC)
MACHINE_KEYWORDS = [
"pound key","press pound","record","please leave","leave a message",
"can't come","can't get","forwarded","sorry i miss","sorry i missed",
"we are not","not available","we're unable","we are unable",
"can't take","can't answer","voice mail","voicemail","leave your name",
"after the tone","at the tone","beep"
]
HUMAN_KEYWORDS = [
"speaking","this is","how can i help","yeah","yes","okay","just a sec"
]
IGNORE_WORDS = ["hi","hello"] # ignored globally
def normalize_text(t: str) -> str:
if not t:
return ""
t = t.lower().strip()
# remove ignore words as isolated tokens
for w in IGNORE_WORDS:
t = re.sub(rf"\b{re.escape(w)}\b", " ", t)
t = re.sub(r"\s+", " ", t)
return t.strip()
def keyword_hits(text: str, vocab) -> list[str]:
hits = []
for kw in vocab:
if kw in text:
hits.append(kw)
return hits
def wav_bytes_to_float32(samples_bytes: bytes, sample_rate: int = DEFAULT_SAMPLE_RATE) -> np.ndarray:
"""Decode linear PCM16 bytes to float32 [-1,1]."""
arr = np.frombuffer(samples_bytes, dtype=np.int16).astype(np.float32) / 32768.0
return arr
def ensure_2s(buf: np.ndarray, sample_rate: int = DEFAULT_SAMPLE_RATE) -> np.ndarray:
need = int(sample_rate*MAX_SEC)
if buf.shape[0] >= need:
return buf[:need]
out = np.zeros((need,), dtype=np.float32)
out[:buf.shape[0]] = buf
return out
|