File size: 1,648 Bytes
e112632
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
\
import re, io, os, numpy as np, soundfile as sf
from pydub import AudioSegment

DEFAULT_SAMPLE_RATE = int(os.getenv("SAMPLE_RATE", "16000"))
MAX_SEC = 2.0
N_SAMPLES = int(DEFAULT_SAMPLE_RATE*MAX_SEC)

MACHINE_KEYWORDS = [
    "pound key","press pound","record","please leave","leave a message",
    "can't come","can't get","forwarded","sorry i miss","sorry i missed",
    "we are not","not available","we're unable","we are unable",
    "can't take","can't answer","voice mail","voicemail","leave your name",
    "after the tone","at the tone","beep"
]
HUMAN_KEYWORDS = [
    "speaking","this is","how can i help","yeah","yes","okay","just a sec"
]
IGNORE_WORDS = ["hi","hello"]  # ignored globally

def normalize_text(t: str) -> str:
    if not t:
        return ""
    t = t.lower().strip()
    # remove ignore words as isolated tokens
    for w in IGNORE_WORDS:
        t = re.sub(rf"\b{re.escape(w)}\b", " ", t)
    t = re.sub(r"\s+", " ", t)
    return t.strip()

def keyword_hits(text: str, vocab) -> list[str]:
    hits = []
    for kw in vocab:
        if kw in text:
            hits.append(kw)
    return hits

def wav_bytes_to_float32(samples_bytes: bytes, sample_rate: int = DEFAULT_SAMPLE_RATE) -> np.ndarray:
    """Decode linear PCM16 bytes to float32 [-1,1]."""
    arr = np.frombuffer(samples_bytes, dtype=np.int16).astype(np.float32) / 32768.0
    return arr

def ensure_2s(buf: np.ndarray, sample_rate: int = DEFAULT_SAMPLE_RATE) -> np.ndarray:
    need = int(sample_rate*MAX_SEC)
    if buf.shape[0] >= need:
        return buf[:need]
    out = np.zeros((need,), dtype=np.float32)
    out[:buf.shape[0]] = buf
    return out