File size: 1,633 Bytes
903b444
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# Global configuration — paths, models, metadata, and search settings
from pathlib import Path

# --- Directories & files ---
ROOT = Path(__file__).resolve().parents[1]  # project root (kis_project_v1.1/)
DATA_DIR = ROOT / "data"
SUBS_DIR = DATA_DIR / "subtitles"
META_CSV = DATA_DIR / "metadata.csv"
INDEX_DIR = DATA_DIR / "embeddings"
FAISS_PATH = INDEX_DIR / "faiss.index"

# --- Models & params 
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
SUMMARY_MODEL = "sshleifer/distilbart-cnn-12-6" 
LINES_PER_CHUNK = 40  

# --- Unified video metadata ---
VIDEO_METADATA = {
    "artificial intelligence": {
        "id": "SSE4M0gcmvE",
        "title": "Introduction to Artificial Intelligence | What Is AI? | Simplilearn"
    },
    "machine learning": {
        "id": "ukzFI9rgwfU",
        "title": "Machine Learning | What Is Machine Learning? | Simplilearn"
    },
    "deep learning": {
        "id": "FbxTVRfQFuI",
        "title": "Deep Learning Explained | Neural Networks | EdX"
    }
}

# --- Abbreviations for app suggestion logic 
ABBREVIATION_MAP = {
    "ml": "machine learning",
    "ai": "artificial intelligence",
    "dl": "deep learning",
    "nn": "neural network",
    "ann": "artificial neural network",
    "cnn": "convolutional neural network",
    "rnn": "recurrent neural network",
    "svm": "support vector machine",
    "knn": "k-nearest neighbors",
    "lr": "logistic regression",
    "gd": "gradient descent",
    "nlp": "natural language processing"
}

# --- Search settings ---
SEARCH_CONFIG = {
    "embedding_model": EMBEDDING_MODEL,
    "faiss_top_k": 100,      
    "results_per_page": 5  
}