|
\ |
|
import re, io, os, numpy as np, soundfile as sf |
|
from pydub import AudioSegment |
|
|
|
DEFAULT_SAMPLE_RATE = int(os.getenv("SAMPLE_RATE", "16000")) |
|
MAX_SEC = 2.0 |
|
N_SAMPLES = int(DEFAULT_SAMPLE_RATE*MAX_SEC) |
|
|
|
MACHINE_KEYWORDS = [ |
|
"pound key","press pound","record","please leave","leave a message", |
|
"can't come","can't get","forwarded","sorry i miss","sorry i missed", |
|
"we are not","not available","we're unable","we are unable", |
|
"can't take","can't answer","voice mail","voicemail","leave your name", |
|
"after the tone","at the tone","beep" |
|
] |
|
HUMAN_KEYWORDS = [ |
|
"speaking","this is","how can i help","yeah","yes","okay","just a sec" |
|
] |
|
IGNORE_WORDS = ["hi","hello"] |
|
|
|
def normalize_text(t: str) -> str: |
|
if not t: |
|
return "" |
|
t = t.lower().strip() |
|
|
|
for w in IGNORE_WORDS: |
|
t = re.sub(rf"\b{re.escape(w)}\b", " ", t) |
|
t = re.sub(r"\s+", " ", t) |
|
return t.strip() |
|
|
|
def keyword_hits(text: str, vocab) -> list[str]: |
|
hits = [] |
|
for kw in vocab: |
|
if kw in text: |
|
hits.append(kw) |
|
return hits |
|
|
|
def wav_bytes_to_float32(samples_bytes: bytes, sample_rate: int = DEFAULT_SAMPLE_RATE) -> np.ndarray: |
|
"""Decode linear PCM16 bytes to float32 [-1,1].""" |
|
arr = np.frombuffer(samples_bytes, dtype=np.int16).astype(np.float32) / 32768.0 |
|
return arr |
|
|
|
def ensure_2s(buf: np.ndarray, sample_rate: int = DEFAULT_SAMPLE_RATE) -> np.ndarray: |
|
need = int(sample_rate*MAX_SEC) |
|
if buf.shape[0] >= need: |
|
return buf[:need] |
|
out = np.zeros((need,), dtype=np.float32) |
|
out[:buf.shape[0]] = buf |
|
return out |
|
|