saim1309 commited on
Commit
7f6935c
·
verified ·
1 Parent(s): 316486d

Delete utils.py

Browse files
Files changed (1) hide show
  1. utils.py +0 -42
utils.py DELETED
@@ -1,42 +0,0 @@
1
- import openai
2
- import numpy as np
3
- import re
4
- from typing import List, Tuple
5
- from config import EMBED_MODEL
6
-
7
- def get_embedding(text: str) -> List[float]:
8
- """Generate embedding for a given text."""
9
- text_strip = text.replace("\n", " ").strip()
10
- response = openai.embeddings.create(input=[text_strip], model=EMBED_MODEL)
11
- return response.data[0].embedding
12
-
13
- def cosine_similarity(a: List[float], b: List[float]) -> float:
14
- """Calculate cosine similarity between two vectors."""
15
- a = np.array(a)
16
- b = np.array(b)
17
- if np.linalg.norm(a) == 0 or np.linalg.norm(b) == 0:
18
- return 0.0
19
- return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
20
-
21
- def clean_time(time_str: str) -> str:
22
- """Clean up time string."""
23
- if not time_str:
24
- return ""
25
-
26
- time_match = re.search(r'(\d{1,2}):?(\d{0,2})\s*(AM|PM)', time_str, re.IGNORECASE)
27
- if time_match:
28
- hour = time_match.group(1)
29
- minute = time_match.group(2) or "00"
30
- ampm = time_match.group(3).upper()
31
- return f"{hour}:{minute} {ampm}"
32
-
33
- return time_str.strip()
34
-
35
- def find_top_k_matches(user_embedding, dataset, k=3):
36
- """Find top k matching entries from a dataset."""
37
- scored = []
38
- for entry_id, text, emb in dataset:
39
- score = cosine_similarity(user_embedding, emb)
40
- scored.append((score, entry_id, text))
41
- scored.sort(reverse=True)
42
- return scored[:k]