WillyCodesInit commited on
Commit
7318709
·
verified ·
1 Parent(s): a3c66ab

Create embedding.py

Browse files
Files changed (1) hide show
  1. src/embedding.py +20 -0
src/embedding.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # embedding.py
2
+ import os
3
+ import numpy as np
4
+ import pandas as pd
5
+ import faiss
6
+ from sentence_transformers import SentenceTransformer
7
+
8
+ # --- Load data ---
9
+ def load_data():
10
+ data_path = os.path.join(os.path.dirname(__file__), 'train_data.csv')
11
+ df = pd.read_csv(data_path)
12
+ return df['question'].tolist(), df['answer'].tolist()
13
+
14
+ # --- Embedding model and FAISS index ---
15
+ def setup_embeddings(answers):
16
+ embedder = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2')
17
+ answer_embeddings = embedder.encode(answers, show_progress_bar=True)
18
+ index = faiss.IndexFlatL2(answer_embeddings.shape[1])
19
+ index.add(np.array(answer_embeddings))
20
+ return embedder, index