Spaces:

NIKKI77
/

ks-version-1-1

Sleeping

App Files Files Community

NIKKI77 commited on Aug 19

Commit

903b444

0 Parent(s):

Deploy: GPU-ready HF Space (Docker)

Browse files

Files changed (23) hide show

Dockerfile +25 -0
backend/__pycache__/autocomplete.cpython-313.pyc +0 -0
backend/__pycache__/config.cpython-313.pyc +0 -0
backend/__pycache__/nlp_summary.cpython-313.pyc +0 -0
backend/__pycache__/punctuation.cpython-313.pyc +0 -0
backend/__pycache__/semantic_search.cpython-313.pyc +0 -0
backend/app.py +214 -0
backend/autocomplete.py +51 -0
backend/clean_subtitles.py +99 -0
backend/config.py +54 -0
backend/embed_index.py +58 -0
backend/nlp_summary.py +53 -0
backend/punctuation.py +57 -0
backend/semantic_search.py +142 -0
data/bigrams.pkl +0 -0
data/embeddings/faiss.index +0 -0
data/metadata.csv +0 -0
data/subtitles/Artificial Intelligence.vtt +1440 -0
data/subtitles/Deep Learning.vtt +2757 -0
data/subtitles/Machine Learning.vtt +621 -0
requirements.txt +17 -0
templates/index.html +332 -0
templates/results.html +435 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM python:3.12-slim
+ENV DEBIAN_FRONTEND=noninteractive PIP_NO_CACHE_DIR=1
+WORKDIR /app
+ENV PYTHONPATH=/app/backend:$PYTHONPATH
+COPY . .
+# Install GPU-ready deps
+RUN pip install -r requirements.txt
+# Preload spaCy + NLTK data so runtime doesn't download
+RUN python -m spacy download en_core_web_sm
+RUN python - <<'PY'
+import nltk
+nltk.download('punkt')
+nltk.download('wordnet')
+nltk.download('omw-1.4')
+PY
+# HF Spaces uses port 7860
+EXPOSE 7860
+# Single worker + a few threads = nicer on GPU VRAM
+CMD ["gunicorn","-w","1","-k","gthread","--threads","4","-b","0.0.0.0:7860","backend.app:app"]

backend/__pycache__/autocomplete.cpython-313.pyc ADDED Viewed

Binary file (3.44 kB). View file

backend/__pycache__/config.cpython-313.pyc ADDED Viewed

Binary file (1.55 kB). View file

backend/__pycache__/nlp_summary.cpython-313.pyc ADDED Viewed

Binary file (1.76 kB). View file

backend/__pycache__/punctuation.cpython-313.pyc ADDED Viewed

Binary file (1.98 kB). View file

backend/__pycache__/semantic_search.cpython-313.pyc ADDED Viewed

Binary file (7.56 kB). View file

backend/app.py ADDED Viewed

	@@ -0,0 +1,214 @@

+# Flask app for Subtitle KIS — main routes + search flow
+import os
+import re
+import json as flask_json
+from flask import Flask, render_template, request, jsonify
+from markupsafe import escape, Markup
+from nltk.corpus import wordnet
+from nltk.stem import WordNetLemmatizer
+from semantic_search import search_query
+from nlp_summary import summarize_text
+from autocomplete import get_suggestions
+from config import ABBREVIATION_MAP, VIDEO_METADATA, SEARCH_CONFIG
+# App setup
+template_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'templates')
+app = Flask(__name__, template_folder=template_dir)
+# Security headers:Content Security Policy
+@app.after_request
+def apply_csp(response):
+    response.headers["Content-Security-Policy"] = (
+        "default-src 'self'; "
+        "img-src 'self' https://img.youtube.com data:; "
+        "script-src 'self' 'unsafe-inline'; "
+        "style-src 'self' 'unsafe-inline';"
+    )
+    return response
+# Route: Home page
+@app.route("/")
+def index():
+    return render_template("index.html")
+# Template filter: convert HH:MM:SS to seconds
+@app.template_filter("jump_time")
+def jump_time(timestamp):
+    try:
+        h, m, s = timestamp.split(':')
+        total = int(h) * 3600 + int(m) * 60 + int(float(s))
+        return max(total - 2, 0)
+    except:
+        return 0
+# NLP helpers: lemmatizer + synonym expansion
+lemmatizer = WordNetLemmatizer()
+def get_synonyms(word):
+    """Return a set of synonyms for a single word."""
+    synonyms = set()
+    for syn in wordnet.synsets(word):
+        for lemma in syn.lemmas():
+            synonyms.add(lemma.name().replace("_", " "))
+    return synonyms
+# Highlighting:
+def highlight_keywords(text, keyword, semantic_mode=False):
+    """
+    Highlight exact matches always.
+    In semantic mode, also highlight synonyms and lemmas.
+    """
+    safe_text = escape(text)
+    if len(keyword) <= 3:
+        pattern = re.compile(rf"(?<!\w){re.escape(keyword)}(?!\w)", re.IGNORECASE)
+    else:
+        pattern = re.compile(re.escape(keyword), re.IGNORECASE)
+    if pattern.search(safe_text):
+        return pattern.sub(lambda m: f"<mark>{m.group(0)}</mark>", safe_text)
+    # Semantic mode
+    if semantic_mode:
+        words = keyword.split()
+        for w in words:
+            lemma = lemmatizer.lemmatize(w.lower())
+            candidates = {lemma} | get_synonyms(w)
+            for cand in candidates:
+                if len(cand) <= 3:
+                    syn_pattern = re.compile(rf"(?<!\w){re.escape(cand)}(?!\w)", re.IGNORECASE)
+                else:
+                    syn_pattern = re.compile(rf"\b{re.escape(cand)}\b", re.IGNORECASE)
+                if syn_pattern.search(safe_text):
+                    safe_text = syn_pattern.sub(lambda m: f"<mark>{m.group(0)}</mark>", safe_text)
+        return safe_text
+    return safe_text
+# Core search orchestration
+def perform_search(query, start=0, shown=0, previous_results=None, semantic_mode=False):
+    """Shared search logic for both HTML and JSON endpoints."""
+    if previous_results is None:
+        previous_results = []
+    raw_results, _ = search_query(query, offset=0, top_k=1000, semantic_mode=semantic_mode)
+    # Keyword mode
+    if not semantic_mode:
+        raw_results = [r for r in raw_results if re.search(re.escape(query), r["text"], re.IGNORECASE)]
+    page_size = SEARCH_CONFIG.get("results_per_page", 5)
+    paged_results = raw_results[start:start + page_size]
+    new_results = []
+    for idx, r in enumerate(paged_results):
+        vid_id = r.get("video_id")
+        friendly_key = next((k for k, v in VIDEO_METADATA.items() if v["id"] == vid_id), None)
+        r["video_title"] = VIDEO_METADATA.get(friendly_key, {}).get("title", "Unknown Title")
+        context_chunks = []
+        if idx > 0:
+            context_chunks.append(paged_results[idx - 1]["summary_input"])
+        context_chunks.append(r["summary_input"])
+        if idx + 1 < len(paged_results):
+            context_chunks.append(paged_results[idx + 1]["summary_input"])
+        summary = summarize_text(" ".join(context_chunks), query=query)
+        highlighted_before = highlight_keywords(r["context_before"], query, semantic_mode)
+        highlighted_match = highlight_keywords(r["text"], query, semantic_mode)
+        highlighted_after = highlight_keywords(r["context_after"], query, semantic_mode)
+        r["highlighted_block"] = Markup(f"{highlighted_before}\n{highlighted_match}\n{highlighted_after}")
+        r["summary"] = summary
+        new_results.append(r)
+    combined_results = previous_results + new_results
+    shown += len(new_results)
+    return combined_results, len(raw_results), shown, start + len(new_results)
+# HTML endpoint
+@app.route("/search", methods=["POST"])
+def search():
+    query = request.form.get("query", "").strip()
+    if not query:
+        return render_template("index.html", error="Please enter a search query.")
+    semantic_mode = request.form.get("semantic") == "true"
+    start = int(request.form.get("start", 0))
+    try:
+        previous_results = flask_json.loads(request.form.get("previous_results", "[]"))
+    except:
+        previous_results = []
+    for r in previous_results:
+        if isinstance(r, dict) and "highlighted_block" in r:
+            r["highlighted_block"] = Markup(r["highlighted_block"])
+    shown = int(request.form.get("shown", len(previous_results)))
+    combined_results, total_matches, shown, next_start = perform_search(
+        query, start, shown, previous_results, semantic_mode
+    )
+    # Abbreviation
+    suggestion_term = ""
+    lower_query = query.lower()
+    if lower_query in ABBREVIATION_MAP:
+        suggestion_term = ABBREVIATION_MAP[lower_query]
+    elif lower_query in ABBREVIATION_MAP.values():
+        for abbr, full in ABBREVIATION_MAP.items():
+            if full == lower_query:
+                suggestion_term = abbr
+                break
+    return render_template(
+        "results.html",
+        query=query,
+        results=combined_results,
+        shown=shown,
+        start=next_start,
+        total_matches=total_matches,
+        previous_results=combined_results,
+        suggestion_term=suggestion_term,
+        semantic=semantic_mode
+    )
+# JSON API endpoint
+@app.route("/api/search", methods=["POST"])
+def api_search():
+    data = request.get_json(force=True)
+    query = data.get("query", "").strip()
+    semantic_mode = bool(data.get("semantic", False))
+    start = int(data.get("start", 0))
+    shown = int(data.get("shown", 0))
+    previous_results = data.get("previous_results", [])
+    combined_results, total_matches, shown, next_start = perform_search(
+        query, start, shown, previous_results, semantic_mode
+    )
+    rendered_cards = [
+        render_template("_result_card.html", result=r, query=query, semantic=semantic_mode)
+        for r in combined_results[-SEARCH_CONFIG.get("results_per_page", 5):]
+    ]
+    return jsonify({
+        "html": rendered_cards,
+        "shown": shown,
+        "total_matches": total_matches,
+        "next_start": next_start,
+        "has_more": next_start < total_matches
+    })
+# Autocomplete endpoint
+@app.route("/autocomplete", methods=["GET"])
+def autocomplete():
+    term = request.args.get("term", "")
+    return flask_json.dumps(get_suggestions(term))
+if __name__ == "__main__":
+    app.run(debug=True)

backend/autocomplete.py ADDED Viewed

	@@ -0,0 +1,51 @@

+# Autocomplete backend — builds, loads, and queries bigram index
+import os
+import pickle
+from collections import Counter
+# Paths: where bigrams.pkl is stored
+BIGRAMS_PATH = os.path.join(os.path.dirname(__file__), "../data/bigrams.pkl")
+# Global cache (lazy-loaded bigram counts)
+_bigram_counts = None
+# Build bigrams index from subtitle blocks
+def build_bigrams_index(blocks: list[dict], out_path: str = BIGRAMS_PATH, min_count: int = 2):
+    """
+    Build a bigram frequency file from preprocessed blocks and save to disk.
+    We use a simple whitespace tokenizer and generate bigrams via zip().
+    """
+    all_text = " ".join((b.get("text") or "").lower() for b in blocks)
+    tokens = all_text.split()
+    bigrams = [" ".join(pair) for pair in zip(tokens, tokens[1:])]
+    counts = Counter(bigrams)
+    if min_count > 1:
+        counts = Counter({k: v for k, v in counts.items() if v >= min_count})
+    os.makedirs(os.path.dirname(out_path), exist_ok=True)
+    with open(out_path, "wb") as f:
+        pickle.dump(counts, f)
+# Lazy loader for bigrams.pkl into memory
+def load_bigrams():
+    """Load precomputed bigrams from disk."""
+    global _bigram_counts
+    if _bigram_counts is None:
+        if os.path.exists(BIGRAMS_PATH):
+            with open(BIGRAMS_PATH, "rb") as f:
+                _bigram_counts = pickle.load(f)
+        else:
+            _bigram_counts = Counter()
+# Suggestion function
+def get_suggestions(term: str):
+    """Return top 10 bigram suggestions starting with the given term."""
+    if not term or not term.strip():
+        return []
+    load_bigrams()
+    term = term.lower().strip()
+    matches = [bg for bg in _bigram_counts if bg.startswith(term)]
+    matches.sort(key=lambda x: (-_bigram_counts[x], x))
+    return matches[:10]

backend/clean_subtitles.py ADDED Viewed

	@@ -0,0 +1,99 @@

+# Subtitle cleaning + preparation — reads .vtt, cleans text, chunks, punctuates, and outputs blocks
+import os
+import re
+import json
+from pathlib import Path
+import webvtt
+import pandas as pd
+from punctuation import punctuate_text
+from config import SUBS_DIR, META_CSV, VIDEO_METADATA, LINES_PER_CHUNK
+# Helpers
+def clean_text(text: str) -> str:
+    """Lowercase, strip tags/brackets, keep basic punctuation, collapse spaces."""
+    text = text.lower()
+    text = re.sub(r'<.*?>', '', text)
+    text = re.sub(r'\[.*?\]', '', text)
+    text = re.sub(r"[^a-z0-9.,!?;:'\"()\-\s]", '', text)
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+def vtt_time_to_seconds(t: str) -> float:
+    """Convert WebVTT time ('HH:MM:SS.mmm') to seconds."""
+    h, m, s = t.split(':')
+    return int(h) * 3600 + int(m) * 60 + float(s)
+# Main
+def load_and_prepare_subtitles(folder_path: str | os.PathLike, lines_per_chunk: int = LINES_PER_CHUNK):
+    """
+    Reads .vtt files, cleans captions, chunks by N lines,
+    punctuates the WHOLE chunk once, and returns blocks suitable for indexing.
+    """
+    folder_path = os.fspath(folder_path)
+    subtitle_blocks = []
+    for filename in os.listdir(folder_path):
+        if not filename.endswith(".vtt"):
+            continue
+        stem = Path(filename).stem.strip().lower()
+        meta = VIDEO_METADATA.get(stem)
+        real_video_id = meta["id"] if meta else None
+        if not real_video_id:
+            continue
+        filepath = os.path.join(folder_path, filename)
+        raw_lines = []
+        # Collect cleaned, unpunctuated lines with original timestamps
+        for caption in webvtt.read(filepath):
+            cleaned = clean_text(caption.text)
+            if cleaned:
+                raw_lines.append({
+                    "timestamp": caption.start,
+                    "start_sec": vtt_time_to_seconds(caption.start),
+                    "end_sec": vtt_time_to_seconds(caption.end),
+                    "text": cleaned,
+                    "video_id": real_video_id,
+                })
+        if not raw_lines:
+            continue
+        # Chunk by N lines, then punctuate per chunk
+        for i in range(0, len(raw_lines), lines_per_chunk):
+            chunk_lines = raw_lines[i:i + lines_per_chunk]
+            chunk_raw_text = ""
+            for line in chunk_lines:
+                text = line["text"].strip()
+                if not text:
+                    continue
+                if chunk_raw_text and chunk_raw_text[-1].isalpha() and text[0].isalpha():
+                    chunk_raw_text += " " + text
+                else:
+                    if chunk_raw_text:
+                        chunk_raw_text += " "
+                    chunk_raw_text += text
+            # Punctuate chunk text
+            chunk_text = punctuate_text(chunk_raw_text) or chunk_raw_text
+            chunk_start = chunk_lines[0]["start_sec"]
+            chunk_end = chunk_lines[-1]["end_sec"]
+            subtitle_blocks.append({
+                "text": chunk_text.strip(),
+                "video_id": real_video_id,
+                "timestamp": chunk_lines[0]["timestamp"],
+                "lines": json.dumps(chunk_lines),
+                "chunk_start": chunk_start,
+                "chunk_end": chunk_end,
+            })
+    return subtitle_blocks
+# process all subs and save META_CSV
+if __name__ == "__main__":
+    os.makedirs(os.path.dirname(META_CSV), exist_ok=True)
+    blocks = load_and_prepare_subtitles(SUBS_DIR, lines_per_chunk=LINES_PER_CHUNK)
+    pd.DataFrame(blocks).to_csv(META_CSV, index=False)

backend/config.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# Global configuration — paths, models, metadata, and search settings
+from pathlib import Path
+# --- Directories & files ---
+ROOT = Path(__file__).resolve().parents[1]  # project root (kis_project_v1.1/)
+DATA_DIR = ROOT / "data"
+SUBS_DIR = DATA_DIR / "subtitles"
+META_CSV = DATA_DIR / "metadata.csv"
+INDEX_DIR = DATA_DIR / "embeddings"
+FAISS_PATH = INDEX_DIR / "faiss.index"
+# --- Models & params
+EMBEDDING_MODEL = "all-MiniLM-L6-v2"
+SUMMARY_MODEL = "sshleifer/distilbart-cnn-12-6"
+LINES_PER_CHUNK = 40
+# --- Unified video metadata ---
+VIDEO_METADATA = {
+    "artificial intelligence": {
+        "id": "SSE4M0gcmvE",
+        "title": "Introduction to Artificial Intelligence ｜ What Is AI? ｜ Simplilearn"
+    },
+    "machine learning": {
+        "id": "ukzFI9rgwfU",
+        "title": "Machine Learning ｜ What Is Machine Learning? ｜ Simplilearn"
+    },
+    "deep learning": {
+        "id": "FbxTVRfQFuI",
+        "title": "Deep Learning Explained ｜ Neural Networks ｜ EdX"
+    }
+}
+# --- Abbreviations for app suggestion logic
+ABBREVIATION_MAP = {
+    "ml": "machine learning",
+    "ai": "artificial intelligence",
+    "dl": "deep learning",
+    "nn": "neural network",
+    "ann": "artificial neural network",
+    "cnn": "convolutional neural network",
+    "rnn": "recurrent neural network",
+    "svm": "support vector machine",
+    "knn": "k-nearest neighbors",
+    "lr": "logistic regression",
+    "gd": "gradient descent",
+    "nlp": "natural language processing"
+}
+# --- Search settings ---
+SEARCH_CONFIG = {
+    "embedding_model": EMBEDDING_MODEL,
+    "faiss_top_k": 100,
+    "results_per_page": 5
+}

backend/embed_index.py ADDED Viewed

	@@ -0,0 +1,58 @@

+# Embedding + autocomplete index builder — creates FAISS vector index and bigram index
+import os
+import numpy as np
+import pandas as pd
+import faiss
+from sentence_transformers import SentenceTransformer
+from config import (
+    META_CSV,
+    INDEX_DIR,
+    FAISS_PATH,
+    EMBEDDING_MODEL,
+    VIDEO_METADATA,
+)
+# Autocomplete index builder
+from autocomplete import build_bigrams_index, BIGRAMS_PATH
+# Build FAISS embedding index + bigram autocomplete index
+def build_embedding_index(subtitle_blocks: list[dict]):
+    texts = [(s.get("text") or "") for s in subtitle_blocks]
+    if not texts:
+        raise ValueError("No texts found in subtitle blocks. Did you generate metadata.csv?")
+    model = SentenceTransformer(EMBEDDING_MODEL)
+    vectors = model.encode(texts, show_progress_bar=True, convert_to_numpy=True)
+    vectors = np.asarray(vectors, dtype=np.float32)
+    index = faiss.IndexFlatL2(vectors.shape[1])
+    index.add(vectors)
+    os.makedirs(INDEX_DIR, exist_ok=True)
+    faiss.write_index(index, os.fspath(FAISS_PATH))
+    # Build bigrams for autocomplete
+    build_bigrams_index(subtitle_blocks, out_path=BIGRAMS_PATH, min_count=2)
+# Load subtitle blocks from CSV and with video titles
+def load_blocks_from_csv(csv_path) -> list[dict]:
+    df = pd.read_csv(csv_path)
+    records = df.to_dict("records")
+    for r in records:
+        vid = r.get("video_id")
+        friendly_key = next((k for k, v in VIDEO_METADATA.items() if v["id"] == vid), None)
+        if friendly_key:
+            r["video_title"] = VIDEO_METADATA[friendly_key]["title"]
+        else:
+            r["video_title"] = "Unknown Video"
+    return records
+# build FAISS + autocomplete indexes
+if __name__ == "__main__":
+    if not META_CSV.exists():
+        raise FileNotFoundError(
+            f"metadata.csv not found at {META_CSV}. Run clean_subtitles.py first to generate it."
+        )
+    blocks = load_blocks_from_csv(META_CSV)
+    build_embedding_index(blocks)

backend/nlp_summary.py ADDED Viewed

	@@ -0,0 +1,53 @@

+# Summarization helper — DistilBART model with punctuation pre-processing
+from transformers import pipeline
+import torch
+import logging
+from punctuation import punctuate_text
+# Load summarization model
+device = 0 if torch.cuda.is_available() else -1
+summarizer = pipeline(
+    "summarization",
+    model="sshleifer/distilbart-cnn-12-6",
+    device=device
+)
+# Summarize text
+def summarize_text(content: str, query: str = "") -> str:
+    """
+    Summarize already punctuated content, optionally focusing on a query.
+    """
+    if not content.strip():
+        return ""
+    # Ensure content is punctuated before summarizing
+    content = punctuate_text(content)
+    # Build summarization input
+    if query:
+        input_text = f"Summarize the following text focusing on '{query}': {content}"
+    else:
+        input_text = content
+    try:
+        # Token length check (truncate if needed)
+        max_input_chars = 3000
+        if len(input_text) > max_input_chars:
+            input_text = input_text[:max_input_chars] + " [...]"
+        summary = summarizer(
+            input_text,
+            max_length=150,
+            min_length=30,
+            do_sample=True,
+            top_k=50,
+            top_p=0.95,
+            temperature=0.9
+        )[0]["summary_text"]
+        return summary.strip()
+    except Exception as e:
+        logging.error(f"⚠️ Summarization failed: {str(e)}")
+        return content[:200] + " [...]" if len(content) > 200 else content

backend/punctuation.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# Punctuation restoration — loads Oliver Guhr’s model and restores punctuation in raw text
+import torch
+from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+# Model
+MODEL_NAME = "oliverguhr/fullstop-punctuation-multilang-large"
+DEVICE = 0 if torch.cuda.is_available() else -1
+print(f"Loading punctuation model ({MODEL_NAME}) on {'GPU' if DEVICE == 0 else 'CPU'}...")
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME)
+# pipeline for token classification
+punctuation_pipeline = pipeline(
+    "token-classification",
+    model=model,
+    tokenizer=tokenizer,
+    device=DEVICE,
+    aggregation_strategy="simple"
+)
+# Main function
+def punctuate_text(text: str) -> str:
+    """
+    Restores punctuation in the given text using Oliver Guhr's model.
+    Returns the punctuated text.
+    """
+    if not text.strip():
+        return text
+    try:
+        results = punctuation_pipeline(text)
+        punctuated_text = ""
+        for item in results:
+            word = item['word'].replace("▁", " ")
+            label = item['entity_group']
+            # Map labels to punctuation marks
+            if label == "COMMA":
+                punctuated_text += word + ","
+            elif label == "PERIOD":
+                punctuated_text += word + "."
+            elif label == "QUESTION":
+                punctuated_text += word + "?"
+            else:
+                punctuated_text += word
+        # Clean spacing
+        return " ".join(punctuated_text.split())
+    except Exception as e:
+        print(f"[punctuate_text] Error: {e}")
+        return text

backend/semantic_search.py ADDED Viewed

	@@ -0,0 +1,142 @@

+# Search engine — supports semantic search (SBERT + FAISS) and keyword search (BM25)
+import os
+import json
+import re
+import numpy as np
+import pandas as pd
+import faiss
+import torch  # ✅ for GPU/CPU auto-detect
+from sentence_transformers import SentenceTransformer
+from config import VIDEO_METADATA, SEARCH_CONFIG
+# For BM25 keyword ranking
+from rank_bm25 import BM25Okapi
+import nltk
+# ❌ no downloads at import-time in production; ensure 'punkt' is installed in the image
+from nltk.tokenize import word_tokenize
+# ✅ Auto-select device (GPU on server, CPU locally)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Paths
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+INDEX_PATH = os.path.join(BASE_DIR, "../data/embeddings/faiss.index")
+METADATA_PATH = os.path.join(BASE_DIR, "../data/metadata.csv")
+# Load model + indexes
+MODEL_NAME = SEARCH_CONFIG.get("embedding_model", "all-MiniLM-L6-v2")
+model = SentenceTransformer(MODEL_NAME, device=DEVICE)  # ✅ now uses GPU if available
+faiss_index = faiss.read_index(INDEX_PATH)
+metadata_df = pd.read_csv(METADATA_PATH)
+# Build BM25 index
+bm25_corpus = []
+bm25_metadata = []
+for _, row in metadata_df.iterrows():
+    lines_raw = json.loads(row["lines"]) if isinstance(row["lines"], str) else row["lines"]
+    if not lines_raw:
+        continue
+    for i, line in enumerate(lines_raw):
+        bm25_corpus.append(word_tokenize(line["text"].lower()))
+        bm25_metadata.append({
+            "text": line["text"].strip(),
+            "timestamp": line["timestamp"],
+            "video_id": line["video_id"],
+            "context_before": lines_raw[i - 1]["text"].strip() if i > 0 else "",
+            "context_after": lines_raw[i + 1]["text"].strip() if i + 1 < len(lines_raw) else "",
+            "summary_input": row["text"]
+        })
+bm25_index = BM25Okapi(bm25_corpus)
+# Search function
+def search_query(query, offset=0, top_k=SEARCH_CONFIG.get("results_per_page", 5), semantic_mode=True):
+    """
+    Search:
+    - Semantic mode → SBERT + FAISS + similarity threshold.
+    - Keyword mode → BM25 ranking over all subtitle lines.
+    """
+    if semantic_mode:
+        query_vector = model.encode([query])
+        faiss_top_k = SEARCH_CONFIG.get("faiss_top_k", 100)
+        semantic_threshold = SEARCH_CONFIG.get("semantic_threshold", 0.40)
+        semantic_top_n = SEARCH_CONFIG.get("semantic_top_n", 4)
+        # Semantic search with FAISS
+        D, I = faiss_index.search(np.array(query_vector), faiss_top_k)
+        all_hits_with_scores = []
+        for idx, score in zip(I[0], D[0]):
+            current = metadata_df.iloc[idx]
+            lines_raw = json.loads(current["lines"]) if isinstance(current["lines"], str) else current["lines"]
+            if not lines_raw:
+                continue
+            # Encode all lines in this chunk
+            line_texts = [line["text"] for line in lines_raw]
+            line_vectors = model.encode(line_texts)
+            query_vec = query_vector[0]
+            similarities = np.dot(line_vectors, query_vec) / (
+                np.linalg.norm(line_vectors, axis=1) * np.linalg.norm(query_vec)
+            )
+            line_indices = [i for i, sim in enumerate(similarities) if sim >= semantic_threshold]
+            line_indices.sort(key=lambda i: similarities[i], reverse=True)
+            line_indices = line_indices[:semantic_top_n]
+            for i in line_indices:
+                match_text = lines_raw[i]["text"]
+                match_time = lines_raw[i]["timestamp"]
+                video_id = lines_raw[i]["video_id"]
+                if re.search(re.escape(query), match_text, re.IGNORECASE):
+                    score -= 0.05
+                friendly_key = next((k for k, v in VIDEO_METADATA.items() if v["id"] == video_id), None)
+                video_title = VIDEO_METADATA[friendly_key]["title"] if friendly_key else "Unknown Video"
+                before = lines_raw[i - 1]["text"] if i > 0 else ""
+                after = lines_raw[i + 1]["text"] if i + 1 < len(lines_raw) else ""
+                summary_block = current["text"]
+                all_hits_with_scores.append((
+                    score,
+                    {
+                        "text": match_text.strip(),
+                        "context_before": before.strip(),
+                        "context_after": after.strip(),
+                        "summary_input": summary_block,
+                        "timestamp": match_time,
+                        "video_id": video_id,
+                        "video_title": video_title
+                    }
+                ))
+        all_hits_with_scores.sort(key=lambda x: x[0])
+        sorted_hits = [hit for _, hit in all_hits_with_scores]
+        return sorted_hits[offset:offset + top_k], len(sorted_hits)
+    else:
+        # Keyword mode: BM25
+        tokenized_query = word_tokenize(query.lower())
+        scores = bm25_index.get_scores(tokenized_query)
+        sorted_indices = np.argsort(scores)[::-1]
+        all_hits_with_scores = []
+        for idx in sorted_indices:
+            if scores[idx] <= 0:
+                continue
+            r = bm25_metadata[idx]
+            video_id = r["video_id"]
+            friendly_key = next((k for k, v in VIDEO_METADATA.items() if v["id"] == video_id), None)
+            video_title = VIDEO_METADATA[friendly_key]["title"] if friendly_key else "Unknown Video"
+            r["video_title"] = video_title
+            all_hits_with_scores.append((scores[idx], r))
+        all_hits_with_scores.sort(key=lambda x: x[0], reverse=True)
+        sorted_hits = [hit for _, hit in all_hits_with_scores]
+        return sorted_hits[offset:offset + top_k], len(sorted_hits)

data/bigrams.pkl ADDED Viewed

Binary file (12.9 kB). View file

data/embeddings/faiss.index ADDED Viewed

Binary file (63 kB). View file

data/metadata.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/subtitles/Artificial Intelligence.vtt ADDED Viewed

	@@ -0,0 +1,1440 @@

+WEBVTT - Subtitles by: DownloadYoutubeSubtitles.com
+00:00:02.590 --> 00:00:12.400
+[Music]
+00:00:10.160 --> 00:00:16.200
+introduction of artificial intelligence
+00:00:12.400 --> 00:00:16.200
+and machine learning
+00:00:16.640 --> 00:00:21.680
+by the end of this lesson you will be
+00:00:18.480 --> 00:00:23.519
+able to define artificial intelligence
+00:00:21.680 --> 00:00:26.880
+describe the relationship between
+00:00:23.519 --> 00:00:29.439
+artificial intelligence and data science
+00:00:26.880 --> 00:00:31.119
+define machine learning
+00:00:29.439 --> 00:00:33.440
+describe the relationship between
+00:00:31.119 --> 00:00:35.760
+machine learning artificial intelligence
+00:00:33.440 --> 00:00:37.760
+and data science
+00:00:35.760 --> 00:00:38.960
+describe different machine learning
+00:00:37.760 --> 00:00:41.200
+approaches
+00:00:38.960 --> 00:00:43.360
+identify the applications of machine
+00:00:41.200 --> 00:00:45.280
+learning
+00:00:43.360 --> 00:00:48.480
+let's understand how the field of
+00:00:45.280 --> 00:00:50.640
+artificial intelligence emerged
+00:00:48.480 --> 00:00:52.640
+let's first understand the reason behind
+00:00:50.640 --> 00:00:54.719
+the emergence of a.i
+00:00:52.640 --> 00:00:57.039
+data economy is one of the factors
+00:00:54.719 --> 00:00:59.359
+behind the emergence of ai
+00:00:57.039 --> 00:01:01.920
+it refers to how much data has grown
+00:00:59.359 --> 00:01:04.239
+over the past few years and how much
+00:01:01.920 --> 00:01:06.159
+more it can grow in the coming years
+00:01:04.239 --> 00:01:08.320
+when you look at this graph you can
+00:01:06.159 --> 00:01:10.000
+clearly understand how the volume of
+00:01:08.320 --> 00:01:12.720
+data has grown
+00:01:10.000 --> 00:01:15.840
+you can see that since 2009 the data
+00:01:12.720 --> 00:01:18.000
+volume has increased by 44 times with
+00:01:15.840 --> 00:01:20.560
+the help of social websites
+00:01:18.000 --> 00:01:23.119
+the explosion of data has given rise to
+00:01:20.560 --> 00:01:25.360
+a new economy and there is a constant
+00:01:23.119 --> 00:01:29.040
+battle for ownership of data between
+00:01:25.360 --> 00:01:31.119
+companies to derive benefits from it
+00:01:29.040 --> 00:01:33.680
+now that you know that data has grown at
+00:01:31.119 --> 00:01:36.000
+a rapid pace in the past few years and
+00:01:33.680 --> 00:01:38.560
+is going to continue to grow
+00:01:36.000 --> 00:01:40.960
+let's understand the need for ai
+00:01:38.560 --> 00:01:44.000
+as you know the increase in data volume
+00:01:40.960 --> 00:01:46.320
+has given rise to big data which helps
+00:01:44.000 --> 00:01:49.520
+manage huge amounts of data
+00:01:46.320 --> 00:01:52.000
+data science helps analyze that data so
+00:01:49.520 --> 00:01:54.640
+the science associated with data is
+00:01:52.000 --> 00:01:56.720
+going toward a new paradigm
+00:01:54.640 --> 00:01:59.600
+where one can teach machines to learn
+00:01:56.720 --> 00:02:01.840
+from data and drive a variety of useful
+00:01:59.600 --> 00:02:04.159
+insights giving rise to artificial
+00:02:01.840 --> 00:02:06.840
+intelligence
+00:02:04.159 --> 00:02:09.360
+now you may ask what is artificial
+00:02:06.840 --> 00:02:11.680
+intelligence artificial intelligence
+00:02:09.360 --> 00:02:14.640
+refers to the intelligence displayed by
+00:02:11.680 --> 00:02:15.840
+machines that simulates human and animal
+00:02:14.640 --> 00:02:18.319
+intelligence
+00:02:15.840 --> 00:02:20.400
+it involves intelligence agents
+00:02:18.319 --> 00:02:23.120
+the autonomous entities that perceive
+00:02:20.400 --> 00:02:25.360
+their environment and take actions that
+00:02:23.120 --> 00:02:26.800
+maximize their chances of success at a
+00:02:25.360 --> 00:02:28.959
+given goal
+00:02:26.800 --> 00:02:31.280
+artificial intelligence is a technique
+00:02:28.959 --> 00:02:33.840
+that enables computers to mimic human
+00:02:31.280 --> 00:02:36.560
+intelligence using logic
+00:02:33.840 --> 00:02:38.400
+it is a program that can sense reason
+00:02:36.560 --> 00:02:40.319
+and act
+00:02:38.400 --> 00:02:43.200
+let's look at some of the areas where
+00:02:40.319 --> 00:02:45.680
+artificial intelligence is used
+00:02:43.200 --> 00:02:47.519
+artificial intelligence is redefining
+00:02:45.680 --> 00:02:50.480
+industries by providing greater
+00:02:47.519 --> 00:02:51.760
+personalization to users and automating
+00:02:50.480 --> 00:02:54.000
+processes
+00:02:51.760 --> 00:02:57.519
+one example of artificial intelligence
+00:02:54.000 --> 00:02:59.360
+in practice is self-driving cars
+00:02:57.519 --> 00:03:02.400
+self-driving cars are computer
+00:02:59.360 --> 00:03:04.640
+controlled cars that drive themselves
+00:03:02.400 --> 00:03:06.800
+in these cars human drivers are never
+00:03:04.640 --> 00:03:08.480
+required to take control to safely
+00:03:06.800 --> 00:03:11.120
+operate the vehicle
+00:03:08.480 --> 00:03:13.599
+these cars are also known as autonomous
+00:03:11.120 --> 00:03:16.560
+or driverless cars
+00:03:13.599 --> 00:03:19.200
+let's see how apple uses ai
+00:03:16.560 --> 00:03:20.879
+iphone users can experience the power of
+00:03:19.200 --> 00:03:23.040
+siri the voice
+00:03:20.879 --> 00:03:24.800
+it simplifies navigating through your
+00:03:23.040 --> 00:03:27.440
+iphone as it listens to your voice
+00:03:24.800 --> 00:03:29.599
+commands to perform tasks
+00:03:27.440 --> 00:03:32.879
+for instance you can ask siri to call
+00:03:29.599 --> 00:03:36.080
+your friend or to play music siri is fun
+00:03:32.879 --> 00:03:38.480
+and is extremely convenient to use
+00:03:36.080 --> 00:03:40.560
+another example is google's alphago
+00:03:38.480 --> 00:03:42.480
+which is a computer program that plays
+00:03:40.560 --> 00:03:44.879
+the board game go
+00:03:42.480 --> 00:03:47.599
+it is the first computer program to
+00:03:44.879 --> 00:03:49.840
+defeat a world champion at the ancient
+00:03:47.599 --> 00:03:52.560
+chinese game of go
+00:03:49.840 --> 00:03:54.640
+amazon echo is another product it's a
+00:03:52.560 --> 00:03:57.120
+home control chatbot device that
+00:03:54.640 --> 00:03:59.680
+responds to humans according to what
+00:03:57.120 --> 00:04:02.080
+they are saying it responds by playing
+00:03:59.680 --> 00:04:04.080
+music movies and more
+00:04:02.080 --> 00:04:06.239
+if you've got compatible smart home
+00:04:04.080 --> 00:04:09.120
+devices you can tell echo to dim the
+00:04:06.239 --> 00:04:11.439
+lights or turn appliances on or off you
+00:04:09.120 --> 00:04:14.879
+can use ai and chess and here is an
+00:04:11.439 --> 00:04:16.959
+example of a concierge robot from ibm
+00:04:14.879 --> 00:04:20.079
+called ibm watson
+00:04:16.959 --> 00:04:22.320
+the ibm watson ai has typically been in
+00:04:20.079 --> 00:04:26.000
+the headlines for composing music
+00:04:22.320 --> 00:04:28.320
+playing chess and even cooking food
+00:04:26.000 --> 00:04:30.560
+let's move ahead and look at some sci-fi
+00:04:28.320 --> 00:04:32.000
+movies with the concept of artificial
+00:04:30.560 --> 00:04:34.320
+intelligence
+00:04:32.000 --> 00:04:36.800
+the films featuring ai reflect the
+00:04:34.320 --> 00:04:39.520
+ever-changing spectrum of our emotions
+00:04:36.800 --> 00:04:41.840
+regarding the machines we have created
+00:04:39.520 --> 00:04:44.080
+humans are fascinated by the concept of
+00:04:41.840 --> 00:04:46.639
+artificial intelligence and this is
+00:04:44.080 --> 00:04:48.240
+reflected in the wide range of movies on
+00:04:46.639 --> 00:04:50.800
+ai
+00:04:48.240 --> 00:04:53.360
+recommendations systems are used by a
+00:04:50.800 --> 00:04:56.160
+lot of e-commerce companies let's see
+00:04:53.360 --> 00:04:58.560
+how they work
+00:04:56.160 --> 00:05:00.800
+amazon collects data from users and
+00:04:58.560 --> 00:05:03.759
+recommends the best product according to
+00:05:00.800 --> 00:05:05.520
+the user's buying or shopping pattern
+00:05:03.759 --> 00:05:08.720
+for example when you search for a
+00:05:05.520 --> 00:05:10.400
+specific product in the amazon store and
+00:05:08.720 --> 00:05:12.800
+add it to your cart
+00:05:10.400 --> 00:05:14.479
+amazon recommends some relevant products
+00:05:12.800 --> 00:05:16.160
+based on your past shopping and
+00:05:14.479 --> 00:05:18.479
+searching pattern
+00:05:16.160 --> 00:05:20.400
+so before you buy the selected product
+00:05:18.479 --> 00:05:22.960
+you get recommendations based on your
+00:05:20.400 --> 00:05:25.199
+interest and there is a possibility that
+00:05:22.960 --> 00:05:28.240
+you may also buy the relevant product
+00:05:25.199 --> 00:05:30.000
+with a selected product if not you have
+00:05:28.240 --> 00:05:33.280
+the chance to compare the selected
+00:05:30.000 --> 00:05:35.360
+product with the recommended products
+00:05:33.280 --> 00:05:37.039
+now let's move ahead and understand the
+00:05:35.360 --> 00:05:39.840
+relationship between artificial
+00:05:37.039 --> 00:05:42.000
+intelligence machine learning and data
+00:05:39.840 --> 00:05:43.680
+science
+00:05:42.000 --> 00:05:46.560
+even though the terms artificial
+00:05:43.680 --> 00:05:49.520
+intelligence ai machine learning and
+00:05:46.560 --> 00:05:51.360
+data science fall in the same domain and
+00:05:49.520 --> 00:05:54.160
+are connected to each other they have
+00:05:51.360 --> 00:05:56.320
+their specific applications and meaning
+00:05:54.160 --> 00:05:58.000
+let's try to understand a little about
+00:05:56.320 --> 00:06:00.479
+each of these terms
+00:05:58.000 --> 00:06:02.800
+artificial intelligence systems mimic or
+00:06:00.479 --> 00:06:04.880
+replicate human intelligence
+00:06:02.800 --> 00:06:07.440
+machine learning provides systems the
+00:06:04.880 --> 00:06:09.440
+ability to automatically learn and
+00:06:07.440 --> 00:06:12.000
+improve from the experiences without
+00:06:09.440 --> 00:06:14.160
+being explicitly programmed
+00:06:12.000 --> 00:06:17.360
+data science is an umbrella term that
+00:06:14.160 --> 00:06:20.240
+encompasses data analytics data mining
+00:06:17.360 --> 00:06:22.880
+machine learning artificial intelligence
+00:06:20.240 --> 00:06:25.120
+and several other related disciplines
+00:06:22.880 --> 00:06:26.960
+let's look at the flow diagram and try
+00:06:25.120 --> 00:06:27.919
+to understand the relationship between
+00:06:26.960 --> 00:06:30.800
+ai
+00:06:27.919 --> 00:06:33.680
+machine learning and data science
+00:06:30.800 --> 00:06:35.680
+interestingly ml is also an element of
+00:06:33.680 --> 00:06:38.160
+artificial intelligence
+00:06:35.680 --> 00:06:40.000
+so the first step is data gathering and
+00:06:38.160 --> 00:06:42.319
+data transformation
+00:06:40.000 --> 00:06:43.520
+this step basically comes under data
+00:06:42.319 --> 00:06:45.600
+science
+00:06:43.520 --> 00:06:47.759
+data transformation is the process of
+00:06:45.600 --> 00:06:50.080
+converting data from one format or
+00:06:47.759 --> 00:06:51.199
+structure into another format or
+00:06:50.080 --> 00:06:53.199
+structure
+00:06:51.199 --> 00:06:55.759
+data transformation is important to
+00:06:53.199 --> 00:06:57.759
+activities such as data management and
+00:06:55.759 --> 00:06:59.759
+data integration
+00:06:57.759 --> 00:07:02.000
+after gathering data we would want to
+00:06:59.759 --> 00:07:04.319
+use the data to make predictions and
+00:07:02.000 --> 00:07:06.880
+derive insights in order to get
+00:07:04.319 --> 00:07:08.639
+predictions out of the data set we use
+00:07:06.880 --> 00:07:11.599
+machine learning techniques such as
+00:07:08.639 --> 00:07:14.800
+supervised learning or unsupervised
+00:07:11.599 --> 00:07:16.639
+learning on an overview level supervised
+00:07:14.800 --> 00:07:18.960
+and unsupervised learning are the
+00:07:16.639 --> 00:07:21.199
+machine learning techniques used to
+00:07:18.960 --> 00:07:22.400
+extract predictions from a given data
+00:07:21.199 --> 00:07:24.880
+set
+00:07:22.400 --> 00:07:27.199
+now you must be thinking where deep
+00:07:24.880 --> 00:07:29.840
+learning comes into the picture
+00:07:27.199 --> 00:07:32.800
+deep learning is a subfield of machine
+00:07:29.840 --> 00:07:35.680
+learning involved with algorithms
+00:07:32.800 --> 00:07:37.199
+it uses artificial neural networks which
+00:07:35.680 --> 00:07:39.199
+are modeled on the structure and
+00:07:37.199 --> 00:07:40.560
+performance of neurons in the human
+00:07:39.199 --> 00:07:42.880
+brain
+00:07:40.560 --> 00:07:45.199
+deep learning is most effective when
+00:07:42.880 --> 00:07:46.160
+there isn't a clear structure to the
+00:07:45.199 --> 00:07:48.160
+data
+00:07:46.160 --> 00:07:49.680
+that you can just exploit and build
+00:07:48.160 --> 00:07:52.240
+features around
+00:07:49.680 --> 00:07:54.240
+now the next step in the flow diagram is
+00:07:52.240 --> 00:07:55.199
+to get insights from predictions being
+00:07:54.240 --> 00:07:57.680
+made
+00:07:55.199 --> 00:08:00.319
+in order to do so you need to use data
+00:07:57.680 --> 00:08:02.400
+analysis which actually is the process
+00:08:00.319 --> 00:08:04.080
+under data science
+00:08:02.400 --> 00:08:06.240
+now when you are done with all of these
+00:08:04.080 --> 00:08:07.280
+you must want your data to perform some
+00:08:06.240 --> 00:08:10.160
+actions
+00:08:07.280 --> 00:08:12.080
+this is where ai comes into the picture
+00:08:10.160 --> 00:08:14.479
+artificial intelligence combines
+00:08:12.080 --> 00:08:17.039
+predictions and insights to perform
+00:08:14.479 --> 00:08:19.680
+actions based on the human decision and
+00:08:17.039 --> 00:08:21.759
+automated decision
+00:08:19.680 --> 00:08:23.440
+now let's move ahead and understand the
+00:08:21.759 --> 00:08:26.240
+relationship between artificial
+00:08:23.440 --> 00:08:28.160
+intelligence machine learning and data
+00:08:26.240 --> 00:08:30.000
+science
+00:08:28.160 --> 00:08:32.080
+let's look at the relationship between
+00:08:30.000 --> 00:08:33.200
+artificial intelligence and machine
+00:08:32.080 --> 00:08:35.120
+learning
+00:08:33.200 --> 00:08:36.959
+artificial intelligence is the
+00:08:35.120 --> 00:08:39.039
+engineering of making intelligent
+00:08:36.959 --> 00:08:41.279
+machines and programs
+00:08:39.039 --> 00:08:44.080
+machine learning provides systems the
+00:08:41.279 --> 00:08:46.959
+ability to learn from past experiences
+00:08:44.080 --> 00:08:49.360
+without being explicitly programmed
+00:08:46.959 --> 00:08:51.839
+machine learning allows machines to gain
+00:08:49.360 --> 00:08:54.839
+intelligence thereby enabling artificial
+00:08:51.839 --> 00:08:54.839
+intelligence
+00:08:54.959 --> 00:08:58.880
+let's now understand the relationship
+00:08:56.800 --> 00:09:00.000
+between machine learning and data
+00:08:58.880 --> 00:09:02.160
+science
+00:09:00.000 --> 00:09:03.600
+data science and machine learning go
+00:09:02.160 --> 00:09:06.320
+hand in hand
+00:09:03.600 --> 00:09:08.640
+data science helps evaluate data for
+00:09:06.320 --> 00:09:10.640
+machine learning algorithms
+00:09:08.640 --> 00:09:13.040
+data science covers the whole spectrum
+00:09:10.640 --> 00:09:14.880
+of data processing while machine
+00:09:13.040 --> 00:09:18.240
+learning has the algorithmic or
+00:09:14.880 --> 00:09:18.240
+statistical aspects
+00:09:18.640 --> 00:09:23.839
+data science is the use of statistical
+00:09:21.040 --> 00:09:26.080
+methods to find patterns in the data
+00:09:23.839 --> 00:09:28.720
+statistical machine learning uses the
+00:09:26.080 --> 00:09:31.120
+same techniques as data science
+00:09:28.720 --> 00:09:34.000
+data science includes various techniques
+00:09:31.120 --> 00:09:37.040
+like statistical modeling visualization
+00:09:34.000 --> 00:09:39.440
+and pattern recognition machine learning
+00:09:37.040 --> 00:09:44.080
+focuses on developing algorithms from
+00:09:39.440 --> 00:09:47.680
+the data provided by making predictions
+00:09:44.080 --> 00:09:49.920
+so what is machine learning
+00:09:47.680 --> 00:09:52.560
+machine learning is the capability of an
+00:09:49.920 --> 00:09:55.360
+artificial intelligence system to learn
+00:09:52.560 --> 00:09:57.600
+by extracting patterns from data
+00:09:55.360 --> 00:09:59.600
+it usually delivers quicker more
+00:09:57.600 --> 00:10:02.480
+accurate results to help you spot
+00:09:59.600 --> 00:10:04.399
+profitable opportunities or dangerous
+00:10:02.480 --> 00:10:06.399
+risks
+00:10:04.399 --> 00:10:09.040
+now you must be curious to understand
+00:10:06.399 --> 00:10:11.279
+the features of machine learning machine
+00:10:09.040 --> 00:10:14.000
+learning uses the data to detect
+00:10:11.279 --> 00:10:16.480
+patterns in a data set and adjust
+00:10:14.000 --> 00:10:18.720
+program actions accordingly
+00:10:16.480 --> 00:10:20.640
+pattern detection can be defined as the
+00:10:18.720 --> 00:10:23.200
+classification of data based on
+00:10:20.640 --> 00:10:25.360
+knowledge already gained or on
+00:10:23.200 --> 00:10:26.800
+statistical information extracted from
+00:10:25.360 --> 00:10:28.640
+the patterns
+00:10:26.800 --> 00:10:30.480
+it focuses on the development of
+00:10:28.640 --> 00:10:32.480
+computer programs that can teach
+00:10:30.480 --> 00:10:34.560
+themselves to grow and change
+00:10:32.480 --> 00:10:37.279
+when exposed to new data by using a
+00:10:34.560 --> 00:10:39.760
+method called reinforcement learning
+00:10:37.279 --> 00:10:42.399
+it uses external feedback to teach the
+00:10:39.760 --> 00:10:44.880
+system to change its internal workings
+00:10:42.399 --> 00:10:46.880
+in order to guess better next time
+00:10:44.880 --> 00:10:49.600
+it enables computers to find hidden
+00:10:46.880 --> 00:10:52.640
+insights using iterative algorithms
+00:10:49.600 --> 00:10:55.120
+without being explicitly programmed
+00:10:52.640 --> 00:10:57.519
+machine learning uses algorithms that
+00:10:55.120 --> 00:11:00.399
+learn from previous data to help produce
+00:10:57.519 --> 00:11:02.640
+reliable and repeatable decisions it
+00:11:00.399 --> 00:11:04.560
+automates analytical model building
+00:11:02.640 --> 00:11:07.360
+using the statistical and machine
+00:11:04.560 --> 00:11:10.240
+learning algorithms that tease patterns
+00:11:07.360 --> 00:11:13.200
+and relationships from data and express
+00:11:10.240 --> 00:11:15.120
+them as mathematical equations
+00:11:13.200 --> 00:11:18.160
+let's understand the different machine
+00:11:15.120 --> 00:11:18.160
+learning approaches
+00:11:18.880 --> 00:11:23.519
+so what is the actual difference between
+00:11:21.519 --> 00:11:26.560
+traditional programming and machine
+00:11:23.519 --> 00:11:27.360
+learning in traditional programming data
+00:11:26.560 --> 00:11:30.320
+and
+00:11:27.360 --> 00:11:32.720
+is provided to the computer it processes
+00:11:30.320 --> 00:11:34.560
+them and gives the output however the
+00:11:32.720 --> 00:11:37.360
+machine learning approach is very
+00:11:34.560 --> 00:11:40.959
+different in machine learning algorithms
+00:11:37.360 --> 00:11:43.360
+are applied on the given data and output
+00:11:40.959 --> 00:11:46.000
+the result of the applied algorithm and
+00:11:43.360 --> 00:11:49.360
+calculations is a learning model that
+00:11:46.000 --> 00:11:51.680
+helps machine to learn from the data
+00:11:49.360 --> 00:11:54.320
+in traditional programming you code the
+00:11:51.680 --> 00:11:56.560
+behavior of the program but in machine
+00:11:54.320 --> 00:11:59.120
+learning you leave a lot of that to the
+00:11:56.560 --> 00:12:00.560
+machine to learn from data
+00:11:59.120 --> 00:12:03.040
+now let's first understand the
+00:12:00.560 --> 00:12:05.040
+traditional programming approach
+00:12:03.040 --> 00:12:07.680
+traditionally you would hard code the
+00:12:05.040 --> 00:12:10.320
+decision rules for a problem at hand
+00:12:07.680 --> 00:12:12.240
+evaluate the results of the program and
+00:12:10.320 --> 00:12:15.279
+if the results were satisfactory the
+00:12:12.240 --> 00:12:17.680
+program would be deployed in production
+00:12:15.279 --> 00:12:20.079
+if the results were not as expected one
+00:12:17.680 --> 00:12:22.720
+would review the errors change the
+00:12:20.079 --> 00:12:25.279
+program and evaluate it again
+00:12:22.720 --> 00:12:28.800
+this iterative process continues till
+00:12:25.279 --> 00:12:31.200
+one gets the expected result
+00:12:28.800 --> 00:12:33.120
+what is the machine learning approach in
+00:12:31.200 --> 00:12:35.920
+the new machine learning approach the
+00:12:33.120 --> 00:12:38.240
+decision rules are not hard coded the
+00:12:35.920 --> 00:12:40.160
+problem is solved by training a model
+00:12:38.240 --> 00:12:43.279
+with the training data in order to
+00:12:40.160 --> 00:12:45.760
+derive or learn an algorithm that best
+00:12:43.279 --> 00:12:48.639
+represents the relationship between the
+00:12:45.760 --> 00:12:51.680
+input and the output this trained model
+00:12:48.639 --> 00:12:53.839
+is then evaluated against test data if
+00:12:51.680 --> 00:12:56.160
+the results were satisfactory the model
+00:12:53.839 --> 00:12:58.160
+would be deployed in production and if
+00:12:56.160 --> 00:13:01.920
+the results are not satisfactory the
+00:12:58.160 --> 00:13:05.360
+training is repeated with some changes
+00:13:01.920 --> 00:13:05.360
+machine learning techniques
+00:13:05.600 --> 00:13:09.360
+machine learning uses a number of
+00:13:07.600 --> 00:13:12.079
+theories and techniques from data
+00:13:09.360 --> 00:13:14.839
+science here are some machine learning
+00:13:12.079 --> 00:13:18.480
+techniques classification
+00:13:14.839 --> 00:13:21.440
+categorization clustering trend analysis
+00:13:18.480 --> 00:13:22.959
+anomaly detection visualization and
+00:13:21.440 --> 00:13:26.079
+decision making
+00:13:22.959 --> 00:13:28.160
+let's look at these techniques
+00:13:26.079 --> 00:13:30.160
+classification is a technique in which
+00:13:28.160 --> 00:13:33.360
+the computer program learns from the
+00:13:30.160 --> 00:13:35.040
+data input given to it and then uses
+00:13:33.360 --> 00:13:36.639
+this learning to classify new
+00:13:35.040 --> 00:13:38.959
+observations
+00:13:36.639 --> 00:13:41.839
+classification is used for predicting
+00:13:38.959 --> 00:13:44.000
+discrete responses classification is
+00:13:41.839 --> 00:13:48.360
+used when we are training a model to
+00:13:44.000 --> 00:13:50.399
+predict qualitative targets
+00:13:48.360 --> 00:13:52.959
+categorization is a technique of
+00:13:50.399 --> 00:13:55.440
+organizing data into categories for its
+00:13:52.959 --> 00:13:57.839
+most effective and efficient use
+00:13:55.440 --> 00:14:00.959
+it makes free text searches faster and
+00:13:57.839 --> 00:14:03.279
+provides a better user experience
+00:14:00.959 --> 00:14:05.120
+clustering is a technique of grouping a
+00:14:03.279 --> 00:14:07.199
+set of objects in such a way that
+00:14:05.120 --> 00:14:09.600
+objects in the same group are most
+00:14:07.199 --> 00:14:10.880
+similar to each other than to those in
+00:14:09.600 --> 00:14:13.120
+other groups
+00:14:10.880 --> 00:14:14.959
+it is basically a collection of objects
+00:14:13.120 --> 00:14:18.000
+on the basis of similarity and
+00:14:14.959 --> 00:14:20.320
+dissimilarity between them
+00:14:18.000 --> 00:14:22.240
+trend analysis is a technique aimed at
+00:14:20.320 --> 00:14:24.639
+projecting both current and future
+00:14:22.240 --> 00:14:27.279
+movement of events through the use of
+00:14:24.639 --> 00:14:29.440
+time series data analysis
+00:14:27.279 --> 00:14:32.399
+it represents variations of low
+00:14:29.440 --> 00:14:36.399
+frequency in a time series the high and
+00:14:32.399 --> 00:14:38.880
+medium frequency fluctuations being out
+00:14:36.399 --> 00:14:41.519
+anomaly detection is a technique to
+00:14:38.880 --> 00:14:44.560
+identify cases that are unusual within
+00:14:41.519 --> 00:14:46.639
+data that is seemingly homogenous
+00:14:44.560 --> 00:14:48.720
+anomaly detection can be a key for
+00:14:46.639 --> 00:14:51.360
+solving intrusions by indicating a
+00:14:48.720 --> 00:14:55.639
+presence of intended or unintended
+00:14:51.360 --> 00:14:58.000
+induced attacks defects faults and so on
+00:14:55.639 --> 00:15:01.680
+visualization is a technique to present
+00:14:58.000 --> 00:15:03.760
+data in a pictorial or graphical format
+00:15:01.680 --> 00:15:06.320
+it enables decision makers to see
+00:15:03.760 --> 00:15:08.399
+analytics presented visually
+00:15:06.320 --> 00:15:10.880
+when data is shown in the form of
+00:15:08.399 --> 00:15:12.480
+pictures it becomes easy for users to
+00:15:10.880 --> 00:15:14.639
+understand it
+00:15:12.480 --> 00:15:16.560
+decision making is a technique or skill
+00:15:14.639 --> 00:15:19.600
+that provides you with the ability to
+00:15:16.560 --> 00:15:22.800
+influence managerial decisions with data
+00:15:19.600 --> 00:15:24.720
+as evidence for those possibilities
+00:15:22.800 --> 00:15:26.800
+now i am sure you have a better
+00:15:24.720 --> 00:15:29.120
+understanding of the overview of machine
+00:15:26.800 --> 00:15:31.680
+learning so let's look at some real-time
+00:15:29.120 --> 00:15:33.839
+applications of machine learning
+00:15:31.680 --> 00:15:36.160
+artificial intelligence and machine
+00:15:33.839 --> 00:15:38.320
+learning are being increasingly used in
+00:15:36.160 --> 00:15:40.399
+various functions such as image
+00:15:38.320 --> 00:15:44.079
+processing robotics
+00:15:40.399 --> 00:15:46.399
+data mining video games text analysis
+00:15:44.079 --> 00:15:48.320
+and healthcare let's look at each of
+00:15:46.399 --> 00:15:51.199
+them in more details
+00:15:48.320 --> 00:15:53.279
+so what is image processing it is a
+00:15:51.199 --> 00:15:55.519
+technique to convert an image into a
+00:15:53.279 --> 00:15:58.160
+digital format and perform some
+00:15:55.519 --> 00:16:00.560
+operations on it so as to induce an
+00:15:58.160 --> 00:16:02.800
+enhanced image or to extract some
+00:16:00.560 --> 00:16:04.720
+helpful information from it
+00:16:02.800 --> 00:16:06.399
+let's look at some of the examples of
+00:16:04.720 --> 00:16:08.880
+image processing
+00:16:06.399 --> 00:16:10.959
+facebook does automatic face tagging by
+00:16:08.880 --> 00:16:14.000
+recognizing a face from a previous
+00:16:10.959 --> 00:16:15.839
+user's tagged photos another example is
+00:16:14.000 --> 00:16:19.279
+optional character recognition which
+00:16:15.839 --> 00:16:21.040
+scans printed docs to digitize the text
+00:16:19.279 --> 00:16:23.360
+self-driving cars are another big
+00:16:21.040 --> 00:16:26.079
+example of image processing
+00:16:23.360 --> 00:16:27.839
+autopilot is an optional drive system
+00:16:26.079 --> 00:16:30.399
+for tesla cars
+00:16:27.839 --> 00:16:34.160
+when autopilot is engaged cars can
+00:16:30.399 --> 00:16:37.680
+self-steer adjust speed detect nearby
+00:16:34.160 --> 00:16:40.079
+obstacles apply the brakes and park
+00:16:37.680 --> 00:16:41.040
+now let's see how robotics uses machine
+00:16:40.079 --> 00:16:43.120
+learning
+00:16:41.040 --> 00:16:44.720
+robots are machines that can be used to
+00:16:43.120 --> 00:16:47.199
+do certain jobs
+00:16:44.720 --> 00:16:49.199
+some of the examples of robotics are
+00:16:47.199 --> 00:16:52.079
+where a humanoid robot can read the
+00:16:49.199 --> 00:16:54.079
+emotions of human beings or
+00:16:52.079 --> 00:16:57.040
+an industrial robot is used for
+00:16:54.079 --> 00:16:58.880
+assembling and manufacturing products
+00:16:57.040 --> 00:17:01.360
+so let's look at some real-time
+00:16:58.880 --> 00:17:04.880
+applications of machine learning
+00:17:01.360 --> 00:17:07.199
+let's see what data mining is it is the
+00:17:04.880 --> 00:17:08.160
+method of analyzing hidden patterns in
+00:17:07.199 --> 00:17:10.160
+data
+00:17:08.160 --> 00:17:11.679
+let's look at some of the applications
+00:17:10.160 --> 00:17:13.919
+of data mining
+00:17:11.679 --> 00:17:16.000
+it is used for anomaly detection to
+00:17:13.919 --> 00:17:18.720
+detect credit card fraud and to
+00:17:16.000 --> 00:17:21.839
+determine which transactions vary from
+00:17:18.720 --> 00:17:24.160
+usual purchasing patterns
+00:17:21.839 --> 00:17:26.799
+it is also used in market basket
+00:17:24.160 --> 00:17:30.559
+analysis which is used to detect which
+00:17:26.799 --> 00:17:30.559
+items are often bought together
+00:17:31.679 --> 00:17:38.720
+it can be used for grouping where it
+00:17:33.679 --> 00:17:38.720
+classifies users based on their profiles
+00:17:38.799 --> 00:17:43.280
+machine learning is also applied in many
+00:17:41.039 --> 00:17:46.799
+video games in order to give predictions
+00:17:43.280 --> 00:17:48.640
+based on data in a pokemon go battle
+00:17:46.799 --> 00:17:50.480
+there is a lot of data to take into
+00:17:48.640 --> 00:17:51.760
+account to correctly predict the winner
+00:17:50.480 --> 00:17:53.600
+of a battle
+00:17:51.760 --> 00:17:56.000
+and this is where machine learning
+00:17:53.600 --> 00:17:58.400
+becomes useful a machine learning
+00:17:56.000 --> 00:18:01.280
+classifier will predict the result of
+00:17:58.400 --> 00:18:03.520
+the match based on this data
+00:18:01.280 --> 00:18:05.440
+let's move on to one of the most popular
+00:18:03.520 --> 00:18:07.360
+applications of machine learning which
+00:18:05.440 --> 00:18:09.919
+is text analysis
+00:18:07.360 --> 00:18:11.840
+it is the automated process of obtaining
+00:18:09.919 --> 00:18:14.640
+information from text
+00:18:11.840 --> 00:18:17.600
+one example of text analysis is spam
+00:18:14.640 --> 00:18:19.039
+filtering which is used to detect spam
+00:18:17.600 --> 00:18:21.440
+in emails
+00:18:19.039 --> 00:18:24.160
+another example is sentimental analysis
+00:18:21.440 --> 00:18:26.400
+which is used for classifying an opinion
+00:18:24.160 --> 00:18:28.799
+as positive negative or neutral it
+00:18:26.400 --> 00:18:31.360
+detects public sentiment in twitter feed
+00:18:28.799 --> 00:18:33.280
+or filters customer complaints
+00:18:31.360 --> 00:18:36.320
+it is also used for information
+00:18:33.280 --> 00:18:40.880
+extraction such as extracting specific
+00:18:36.320 --> 00:18:40.880
+data address keyword or entities
+00:18:41.200 --> 00:18:45.520
+there are many applications of machine
+00:18:43.280 --> 00:18:48.720
+learning in the healthcare industry
+00:18:45.520 --> 00:18:51.600
+identifying disease and diagnosis
+00:18:48.720 --> 00:18:54.400
+drug discovery and manufacturing medical
+00:18:51.600 --> 00:18:56.320
+imaging diagnosis and so on
+00:18:54.400 --> 00:18:58.480
+some of the companies that use machine
+00:18:56.320 --> 00:19:01.200
+learning have revolutionized the health
+00:18:58.480 --> 00:19:02.160
+care industry are google deep mind
+00:19:01.200 --> 00:19:06.320
+health
+00:19:02.160 --> 00:19:08.430
+bio beats health fidelity and ginger dot
+00:19:06.320 --> 00:19:12.880
+io
+00:19:08.430 --> 00:19:14.960
+[Music]
+00:19:12.880 --> 00:19:14.960
+you

data/subtitles/Deep Learning.vtt ADDED Viewed

	@@ -0,0 +1,2757 @@

+WEBVTT - Subtitles by: DownloadYoutubeSubtitles.com
+00:00:03.040 --> 00:00:08.960
+hello and welcome to the session on deep
+00:00:05.920 --> 00:00:11.599
+learning my name is mohan and in this
+00:00:08.960 --> 00:00:14.080
+video we are going to talk about what
+00:00:11.599 --> 00:00:16.160
+deep learning is all about some of you
+00:00:14.080 --> 00:00:19.520
+may be already familiar with the image
+00:00:16.160 --> 00:00:22.640
+recognition how does image recognition
+00:00:19.520 --> 00:00:25.680
+work you can train this application or
+00:00:22.640 --> 00:00:28.080
+your machine to recognize whether a
+00:00:25.680 --> 00:00:30.080
+given image is a cat or a dog and this
+00:00:28.080 --> 00:00:32.239
+is how it works at a very high level it
+00:00:30.080 --> 00:00:34.480
+uses artificial neural network it is
+00:00:32.239 --> 00:00:36.559
+trained with some known images and
+00:00:34.480 --> 00:00:38.960
+during the training it is told if it is
+00:00:36.559 --> 00:00:41.040
+recognizing correctly or not and then
+00:00:38.960 --> 00:00:42.960
+when new images are submitted it
+00:00:41.040 --> 00:00:45.680
+recognizes correctly based on the
+00:00:42.960 --> 00:00:47.600
+accuracy of course so a little quick
+00:00:45.680 --> 00:00:50.480
+understanding about artificial neural
+00:00:47.600 --> 00:00:53.600
+networks so this is the way it does is
+00:00:50.480 --> 00:00:56.000
+you provide a lot of training data also
+00:00:53.600 --> 00:00:59.359
+known as labeled data for example in
+00:00:56.000 --> 00:01:02.640
+this case these are the images of dogs
+00:00:59.359 --> 00:01:05.600
+and the network extracts some features
+00:01:02.640 --> 00:01:08.640
+that makes a dog a dog right so that is
+00:01:05.600 --> 00:01:11.760
+known as feature extraction and based on
+00:01:08.640 --> 00:01:13.760
+that when you submit a new image of dog
+00:01:11.760 --> 00:01:15.119
+the basic features remain pretty much
+00:01:13.760 --> 00:01:17.759
+the same it may be a completely
+00:01:15.119 --> 00:01:21.280
+different image but the features of a
+00:01:17.759 --> 00:01:23.200
+dog still remain pretty much the same in
+00:01:21.280 --> 00:01:25.680
+various different images let's say
+00:01:23.200 --> 00:01:28.000
+compared to a cat and that's the way
+00:01:25.680 --> 00:01:30.479
+artificial neural network works we'll go
+00:01:28.000 --> 00:01:32.240
+into details of this uh very shortly and
+00:01:30.479 --> 00:01:35.119
+once the training is done with training
+00:01:32.240 --> 00:01:37.439
+data we then test it with some test data
+00:01:35.119 --> 00:01:39.840
+too which is basically completely new
+00:01:37.439 --> 00:01:42.240
+data which the system has not seen
+00:01:39.840 --> 00:01:43.920
+before unlike the training data and then
+00:01:42.240 --> 00:01:46.560
+we find out whether it is predicting
+00:01:43.920 --> 00:01:49.280
+correctly or not thereby we know whether
+00:01:46.560 --> 00:01:50.799
+the training is complete or it needs
+00:01:49.280 --> 00:01:53.119
+more training so that's not a very high
+00:01:50.799 --> 00:01:55.040
+level artificial neural network works so
+00:01:53.119 --> 00:01:57.119
+this is what we are going to talk about
+00:01:55.040 --> 00:01:59.200
+today our agenda looks something like
+00:01:57.119 --> 00:02:00.799
+this what is deep learning why do we
+00:01:59.200 --> 00:02:03.040
+need deep learning and then what are the
+00:02:00.799 --> 00:02:05.920
+applications of deep learning one of the
+00:02:03.040 --> 00:02:08.239
+main components the secret sauce in deep
+00:02:05.920 --> 00:02:09.599
+learning is neural networks so we're
+00:02:08.239 --> 00:02:10.879
+going to talk about what is neural
+00:02:09.599 --> 00:02:12.879
+network and
+00:02:10.879 --> 00:02:15.520
+how it works and some of its components
+00:02:12.879 --> 00:02:17.440
+like for example the activation function
+00:02:15.520 --> 00:02:20.160
+the gradient descent and so on and so
+00:02:17.440 --> 00:02:21.680
+forth so that as a part of working of a
+00:02:20.160 --> 00:02:23.520
+neural network we will go into little
+00:02:21.680 --> 00:02:26.720
+bit more details how this whole thing
+00:02:23.520 --> 00:02:29.360
+works so without much further ado let's
+00:02:26.720 --> 00:02:31.520
+get started so deep learning is
+00:02:29.360 --> 00:02:34.080
+considered to be a part of machine
+00:02:31.520 --> 00:02:36.720
+learning so this diagram very nicely
+00:02:34.080 --> 00:02:39.599
+depicts what deep learning is at a very
+00:02:36.720 --> 00:02:42.480
+high level you have the all-encompassing
+00:02:39.599 --> 00:02:45.840
+artificial intelligence which is more a
+00:02:42.480 --> 00:02:47.680
+concept rather than a technology or a
+00:02:45.840 --> 00:02:49.280
+technical concept right so it is it's
+00:02:47.680 --> 00:02:51.440
+more of a concept at a very high level
+00:02:49.280 --> 00:02:53.200
+artificial intelligence under the herd
+00:02:51.440 --> 00:02:55.360
+is actually machine learning and deep
+00:02:53.200 --> 00:02:58.560
+learning and machine learning is a
+00:02:55.360 --> 00:03:01.840
+broader concept you can say or a broader
+00:02:58.560 --> 00:03:03.280
+technology and deep learning is a subset
+00:03:01.840 --> 00:03:05.440
+of machine learning the primary
+00:03:03.280 --> 00:03:07.920
+difference between machine learning and
+00:03:05.440 --> 00:03:11.519
+deep learning is that deep learning uses
+00:03:07.920 --> 00:03:14.080
+neural networks and it is suitable for
+00:03:11.519 --> 00:03:16.480
+handling large amounts of unstructured
+00:03:14.080 --> 00:03:18.080
+data and the last but not least one of
+00:03:16.480 --> 00:03:19.599
+the major differences between machine
+00:03:18.080 --> 00:03:22.080
+learning and deep learning is that in
+00:03:19.599 --> 00:03:24.640
+machine learning the feature extraction
+00:03:22.080 --> 00:03:26.959
+or the feature engineering is done by
+00:03:24.640 --> 00:03:29.280
+the data scientists manually but in deep
+00:03:26.959 --> 00:03:30.799
+learning since we use neural networks
+00:03:29.280 --> 00:03:32.720
+the feature engineering happens
+00:03:30.799 --> 00:03:34.720
+automatically so that's a little bit of
+00:03:32.720 --> 00:03:36.000
+a quick difference between machine
+00:03:34.720 --> 00:03:38.159
+learning and deep learning and this
+00:03:36.000 --> 00:03:40.000
+diagram very nicely depicts the relation
+00:03:38.159 --> 00:03:42.239
+between artificial intelligence machine
+00:03:40.000 --> 00:03:44.319
+learning and deep learning now why do we
+00:03:42.239 --> 00:03:47.040
+need deep learning machine learning was
+00:03:44.319 --> 00:03:49.120
+there for quite some time and it can do
+00:03:47.040 --> 00:03:51.599
+a lot of stuff that probably what deep
+00:03:49.120 --> 00:03:53.680
+learning can do but it's not very good
+00:03:51.599 --> 00:03:57.200
+at handling large amounts of
+00:03:53.680 --> 00:03:59.920
+unstructured data like images voice or
+00:03:57.200 --> 00:04:01.920
+even text for that matter so traditional
+00:03:59.920 --> 00:04:03.519
+machine learning is not that very good
+00:04:01.920 --> 00:04:05.040
+at doing this traditional machine
+00:04:03.519 --> 00:04:07.040
+learning can handle large amounts of
+00:04:05.040 --> 00:04:09.120
+structured data but when it comes to
+00:04:07.040 --> 00:04:10.480
+unstructured data it's a big challenge
+00:04:09.120 --> 00:04:12.560
+so that is one of the key
+00:04:10.480 --> 00:04:15.519
+differentiators for deep learning so
+00:04:12.560 --> 00:04:18.320
+that is number one and increasingly for
+00:04:15.519 --> 00:04:20.400
+artificial intelligence we need image
+00:04:18.320 --> 00:04:22.320
+recognition and we need to process
+00:04:20.400 --> 00:04:23.680
+analyze images and voice that's the
+00:04:22.320 --> 00:04:25.520
+reason deep learning is required
+00:04:23.680 --> 00:04:27.840
+compared to let's say traditional
+00:04:25.520 --> 00:04:31.199
+machine learning it can also perform
+00:04:27.840 --> 00:04:33.120
+complex algorithms more complex than
+00:04:31.199 --> 00:04:35.919
+let's say what machine learning can do
+00:04:33.120 --> 00:04:38.000
+and it can achieve best performance with
+00:04:35.919 --> 00:04:39.919
+the large amounts of data so the more
+00:04:38.000 --> 00:04:42.800
+you have the data let's say reference
+00:04:39.919 --> 00:04:44.639
+data or label data the better the system
+00:04:42.800 --> 00:04:46.960
+will do because the training process
+00:04:44.639 --> 00:04:49.040
+will be that much better and last but
+00:04:46.960 --> 00:04:51.600
+not least with deep learning you can
+00:04:49.040 --> 00:04:53.360
+really avoid the manual process of
+00:04:51.600 --> 00:04:55.280
+feature extraction those are some of the
+00:04:53.360 --> 00:04:57.120
+reasons why we need deep learning some
+00:04:55.280 --> 00:05:00.160
+of the applications of deep learning
+00:04:57.120 --> 00:05:02.960
+deep learning has made major inroads and
+00:05:00.160 --> 00:05:05.440
+it is a major area in which deep
+00:05:02.960 --> 00:05:08.880
+learning is applied is healthcare and
+00:05:05.440 --> 00:05:12.080
+within healthcare particularly oncology
+00:05:08.880 --> 00:05:15.199
+which is basically cancer related stuff
+00:05:12.080 --> 00:05:17.919
+one of the issues with cancer is that a
+00:05:15.199 --> 00:05:20.960
+lot of cancers today are curable they
+00:05:17.919 --> 00:05:23.360
+can be cured they are detected early on
+00:05:20.960 --> 00:05:25.600
+and the challenge with that is when a
+00:05:23.360 --> 00:05:28.080
+diagnostics is performed let's say an
+00:05:25.600 --> 00:05:30.320
+image has been taken of a patient to
+00:05:28.080 --> 00:05:33.120
+detect whether there is cancer or not
+00:05:30.320 --> 00:05:35.120
+you need a specialist to look at the
+00:05:33.120 --> 00:05:38.080
+image and determine whether it is the
+00:05:35.120 --> 00:05:41.199
+patient is fine or there is any onset of
+00:05:38.080 --> 00:05:44.160
+cancer and the number of specialists are
+00:05:41.199 --> 00:05:46.639
+limited so if we use deep learning if we
+00:05:44.160 --> 00:05:48.880
+use automation here or if we use
+00:05:46.639 --> 00:05:52.000
+artificial intelligence here then the
+00:05:48.880 --> 00:05:54.639
+system can with a certain amount of the
+00:05:52.000 --> 00:05:57.520
+good amount of accuracy determine
+00:05:54.639 --> 00:06:00.000
+whether a particular patient is having
+00:05:57.520 --> 00:06:02.960
+cancer or not so the prediction or the
+00:06:00.000 --> 00:06:05.919
+detection process of a disease like
+00:06:02.960 --> 00:06:08.160
+cancer can be expedited the detection
+00:06:05.919 --> 00:06:10.800
+process can be expedited can be faster
+00:06:08.160 --> 00:06:13.600
+without really waiting for a specialist
+00:06:10.800 --> 00:06:15.919
+we can obviously then once the
+00:06:13.600 --> 00:06:18.479
+application once the artificial
+00:06:15.919 --> 00:06:20.800
+intelligence detects or predicts that
+00:06:18.479 --> 00:06:23.120
+there is an onset of a cancer this can
+00:06:20.800 --> 00:06:25.680
+be cross-checked by a doctor but at
+00:06:23.120 --> 00:06:27.520
+least the initial screening process can
+00:06:25.680 --> 00:06:29.919
+be automated and that is where the
+00:06:27.520 --> 00:06:32.160
+current focus is with respect to deep
+00:06:29.919 --> 00:06:34.560
+learning in healthcare what else
+00:06:32.160 --> 00:06:38.319
+robotics is another area deep learning
+00:06:34.560 --> 00:06:40.880
+is majorly used in robotics and you must
+00:06:38.319 --> 00:06:43.199
+have seen nowadays robots are everywhere
+00:06:40.880 --> 00:06:45.120
+humanoids the industrial robots which
+00:06:43.199 --> 00:06:48.080
+are used for manufacturing process you
+00:06:45.120 --> 00:06:50.639
+must have heard about sofia who got
+00:06:48.080 --> 00:06:53.360
+citizenship with saudi arabia and so on
+00:06:50.639 --> 00:06:55.840
+there are multiple such robots which are
+00:06:53.360 --> 00:06:58.880
+knowledge oriented but there are also
+00:06:55.840 --> 00:07:00.639
+industrial robots are used in industries
+00:06:58.880 --> 00:07:03.120
+in the manufacturing process and
+00:07:00.639 --> 00:07:05.440
+increasingly in security and also in
+00:07:03.120 --> 00:07:07.840
+defense for example image processing
+00:07:05.440 --> 00:07:10.080
+video is fed to them and they need to be
+00:07:07.840 --> 00:07:11.599
+able to detect objects obstacles and so
+00:07:10.080 --> 00:07:13.520
+on and so forth so that's where deep
+00:07:11.599 --> 00:07:15.599
+learning is used they need to be able to
+00:07:13.520 --> 00:07:17.520
+hear and make sense of the sounds that
+00:07:15.599 --> 00:07:20.400
+they are hearing that needs deep
+00:07:17.520 --> 00:07:22.800
+learning as well so robotics is a major
+00:07:20.400 --> 00:07:25.680
+area where deep learning is applied then
+00:07:22.800 --> 00:07:27.919
+we have self-driving cars or autonomous
+00:07:25.680 --> 00:07:30.960
+cars you must have heard of google's
+00:07:27.919 --> 00:07:33.759
+autonomous car which has been tested for
+00:07:30.960 --> 00:07:35.440
+millions of miles and pretty much
+00:07:33.759 --> 00:07:37.120
+incident free there were of course a
+00:07:35.440 --> 00:07:39.759
+couple of incidents here and there but
+00:07:37.120 --> 00:07:42.880
+it is uh considered to be fairly safe
+00:07:39.759 --> 00:07:45.120
+and there are today a lot of automotive
+00:07:42.880 --> 00:07:47.520
+companies in fact pretty much every
+00:07:45.120 --> 00:07:49.919
+automotive company worth its name is
+00:07:47.520 --> 00:07:52.080
+investing in self-driving cars or
+00:07:49.919 --> 00:07:54.560
+autonomous cars and it is predicted that
+00:07:52.080 --> 00:07:56.160
+in the next probably 10 to 15 years
+00:07:54.560 --> 00:07:59.120
+these will be in production and they
+00:07:56.160 --> 00:08:01.039
+will be used extensively in real life
+00:07:59.120 --> 00:08:03.039
+right now they are all in rnd and in
+00:08:01.039 --> 00:08:05.360
+test phases but pretty soon these will
+00:08:03.039 --> 00:08:07.280
+be on the road so this is another area
+00:08:05.360 --> 00:08:08.960
+where deep learning is used and how is
+00:08:07.280 --> 00:08:11.759
+it used where is it used within
+00:08:08.960 --> 00:08:14.960
+autonomous driving the car actually is
+00:08:11.759 --> 00:08:17.039
+fed with video of surroundings and it is
+00:08:14.960 --> 00:08:18.879
+supposed to process that information
+00:08:17.039 --> 00:08:20.800
+process that video and determine if
+00:08:18.879 --> 00:08:23.039
+there are any obstacles it has to
+00:08:20.800 --> 00:08:25.759
+determine if there are any cars in the
+00:08:23.039 --> 00:08:28.160
+site will detect whether it is driving
+00:08:25.759 --> 00:08:31.759
+in the lane also it has to determine
+00:08:28.160 --> 00:08:34.159
+whether the signal is green or red so
+00:08:31.759 --> 00:08:37.760
+that accordingly it can move forward or
+00:08:34.159 --> 00:08:39.599
+wait so for all these video analysis
+00:08:37.760 --> 00:08:41.919
+deep learning is used in addition to
+00:08:39.599 --> 00:08:44.720
+that the training overall training to
+00:08:41.919 --> 00:08:47.200
+drive the car happens in a deep learning
+00:08:44.720 --> 00:08:48.720
+environment so again a lot of scope here
+00:08:47.200 --> 00:08:51.120
+to use deep learning a couple of other
+00:08:48.720 --> 00:08:54.880
+applications are mission translations
+00:08:51.120 --> 00:08:57.760
+today we have a lot of information and
+00:08:54.880 --> 00:08:59.519
+very often this information is in one
+00:08:57.760 --> 00:09:03.120
+particular language and more
+00:08:59.519 --> 00:09:05.519
+specifically in english and people need
+00:09:03.120 --> 00:09:08.560
+information in various parts of the
+00:09:05.519 --> 00:09:11.120
+world it is pretty difficult for human
+00:09:08.560 --> 00:09:13.519
+beings to translate each and every piece
+00:09:11.120 --> 00:09:15.279
+of information or every document into
+00:09:13.519 --> 00:09:17.440
+all possible languages there are
+00:09:15.279 --> 00:09:19.600
+probably at least hundreds of languages
+00:09:17.440 --> 00:09:22.720
+or if not more to translate each and
+00:09:19.600 --> 00:09:25.920
+every document into every language is
+00:09:22.720 --> 00:09:28.560
+pretty difficult therefore we can use
+00:09:25.920 --> 00:09:31.440
+deep learning to do pretty much like a
+00:09:28.560 --> 00:09:33.200
+real-time translation mechanism so we
+00:09:31.440 --> 00:09:36.160
+don't have to translate everything and
+00:09:33.200 --> 00:09:38.640
+keep it ready but we train applications
+00:09:36.160 --> 00:09:41.519
+or artificial intelligence systems that
+00:09:38.640 --> 00:09:44.560
+will do the translation on the fly for
+00:09:41.519 --> 00:09:46.320
+example you go to somewhere like china
+00:09:44.560 --> 00:09:48.480
+and you want to know what is written on
+00:09:46.320 --> 00:09:50.800
+a signboard now it is impossible for
+00:09:48.480 --> 00:09:52.800
+somebody to translate that and put it on
+00:09:50.800 --> 00:09:55.440
+the web or something like that so you
+00:09:52.800 --> 00:09:57.920
+have an application which is trained to
+00:09:55.440 --> 00:10:00.000
+translate stuff on the fly so you
+00:09:57.920 --> 00:10:02.240
+probably this can be running on your
+00:10:00.000 --> 00:10:05.200
+mobile phone on your smartphone you scan
+00:10:02.240 --> 00:10:07.440
+this the application will instantly
+00:10:05.200 --> 00:10:10.240
+translate that from chinese to english
+00:10:07.440 --> 00:10:11.760
+that is one then there could be web
+00:10:10.240 --> 00:10:14.399
+applications where there may be a
+00:10:11.760 --> 00:10:16.640
+research document which is all in maybe
+00:10:14.399 --> 00:10:19.839
+chinese or japanese and you want to
+00:10:16.640 --> 00:10:22.000
+translate that to study that document or
+00:10:19.839 --> 00:10:23.839
+in that case you need to translate so
+00:10:22.000 --> 00:10:26.160
+therefore deep learning is used in such
+00:10:23.839 --> 00:10:28.160
+situations as well and that is again on
+00:10:26.160 --> 00:10:30.240
+demand so it is not like you have to
+00:10:28.160 --> 00:10:31.920
+translate all these documents from other
+00:10:30.240 --> 00:10:34.000
+languages into english and one shot and
+00:10:31.920 --> 00:10:36.480
+keep it somewhere that is again pretty
+00:10:34.000 --> 00:10:38.160
+much an impossible task but on a neat
+00:10:36.480 --> 00:10:40.399
+basis so you have systems that are
+00:10:38.160 --> 00:10:42.000
+trained to translate on the fly so
+00:10:40.399 --> 00:10:43.600
+mission translation is another major
+00:10:42.000 --> 00:10:45.920
+area where deep learning is used then
+00:10:43.600 --> 00:10:48.800
+there are a few other upcoming areas
+00:10:45.920 --> 00:10:51.279
+where synthesizing is done by neural
+00:10:48.800 --> 00:10:53.680
+nets for example music composition and
+00:10:51.279 --> 00:10:56.880
+generation of music so you can train a
+00:10:53.680 --> 00:10:59.680
+neural net to produce music even to
+00:10:56.880 --> 00:11:02.000
+compose music so this is a fun thing
+00:10:59.680 --> 00:11:04.720
+this is still upcoming it needs a lot of
+00:11:02.000 --> 00:11:06.640
+effort to train such neural net it has
+00:11:04.720 --> 00:11:09.120
+been proved that it is possible so this
+00:11:06.640 --> 00:11:11.760
+is a relatively new area and on the same
+00:11:09.120 --> 00:11:13.920
+lines colorization of images so these
+00:11:11.760 --> 00:11:15.839
+two images on the left hand side is a
+00:11:13.920 --> 00:11:18.720
+grayscale image or a black and white
+00:11:15.839 --> 00:11:20.480
+image this was colored by a neural net
+00:11:18.720 --> 00:11:22.959
+or a deep learning application as you
+00:11:20.480 --> 00:11:25.040
+can see it's done a very good job of
+00:11:22.959 --> 00:11:28.000
+applying the colors and obviously this
+00:11:25.040 --> 00:11:30.320
+was trained to do this colorization but
+00:11:28.000 --> 00:11:33.360
+yes this is one more application of deep
+00:11:30.320 --> 00:11:37.279
+learning now one of the major secret
+00:11:33.360 --> 00:11:40.160
+sauce of deep learning is neural network
+00:11:37.279 --> 00:11:42.240
+deep learning works on neural network or
+00:11:40.160 --> 00:11:45.279
+consists of neural network so let us see
+00:11:42.240 --> 00:11:49.040
+what is neural network neural network or
+00:11:45.279 --> 00:11:53.360
+artificial neural network is designed or
+00:11:49.040 --> 00:11:56.880
+based on the human brain now human brain
+00:11:53.360 --> 00:11:59.519
+consists of billions of small cells that
+00:11:56.880 --> 00:12:03.120
+are known as neurons artificial neural
+00:11:59.519 --> 00:12:05.519
+networks is in a way trying to simulate
+00:12:03.120 --> 00:12:07.839
+the human brain so this is a quick
+00:12:05.519 --> 00:12:10.399
+diagram of biological neuron a
+00:12:07.839 --> 00:12:12.959
+biological neuron consists of the major
+00:12:10.399 --> 00:12:16.079
+part which is the cell nucleus and then
+00:12:12.959 --> 00:12:18.240
+it has some tentacles kind of stuff on
+00:12:16.079 --> 00:12:20.160
+the top called dendrite and then there
+00:12:18.240 --> 00:12:22.399
+is like a long tail which is known as
+00:12:20.160 --> 00:12:24.240
+the axon further again at the end of
+00:12:22.399 --> 00:12:27.680
+this action are what are known as
+00:12:24.240 --> 00:12:30.880
+synapses these in turn are connected to
+00:12:27.680 --> 00:12:33.680
+the dendrites of the next neuron and all
+00:12:30.880 --> 00:12:35.440
+these neurons are interconnected with
+00:12:33.680 --> 00:12:37.519
+each other therefore they are like
+00:12:35.440 --> 00:12:39.440
+billions of them sitting in our brain
+00:12:37.519 --> 00:12:42.000
+and they're all active they're working
+00:12:39.440 --> 00:12:45.360
+they based on the signals they receive
+00:12:42.000 --> 00:12:47.920
+signals as inputs from other neurons or
+00:12:45.360 --> 00:12:50.639
+maybe from other parts of the body and
+00:12:47.920 --> 00:12:52.720
+based on certain criteria they send
+00:12:50.639 --> 00:12:54.800
+signals to the neurons at the other end
+00:12:52.720 --> 00:12:56.880
+so they they get either activated or
+00:12:54.800 --> 00:12:59.760
+they don't get activated based on so it
+00:12:56.880 --> 00:13:02.480
+is like a binary gates so they get
+00:12:59.760 --> 00:13:04.800
+activated or not activated based on the
+00:13:02.480 --> 00:13:06.399
+inputs that they receive and so on so we
+00:13:04.800 --> 00:13:08.720
+will see a little bit of those details
+00:13:06.399 --> 00:13:10.880
+as we move forward in our artificial
+00:13:08.720 --> 00:13:12.320
+neuron but this is a biological neuron
+00:13:10.880 --> 00:13:15.200
+this is the structure of a biological
+00:13:12.320 --> 00:13:17.680
+neuron and artificial neural network is
+00:13:15.200 --> 00:13:20.320
+based on the human brain the smallest
+00:13:17.680 --> 00:13:23.440
+component of artificial neural network
+00:13:20.320 --> 00:13:25.839
+is an artificial neuron as shown here
+00:13:23.440 --> 00:13:28.000
+sometimes is also referred to as
+00:13:25.839 --> 00:13:30.240
+perceptron now this is a very high level
+00:13:28.000 --> 00:13:32.800
+diagram the artificial neuron has a
+00:13:30.240 --> 00:13:35.760
+small central unit which will receive
+00:13:32.800 --> 00:13:38.320
+the input if it is doing let's say image
+00:13:35.760 --> 00:13:41.040
+processing the inputs could be pixel
+00:13:38.320 --> 00:13:44.480
+values of the image which is represented
+00:13:41.040 --> 00:13:47.680
+here as x1 x2 and so on each of the
+00:13:44.480 --> 00:13:50.320
+inputs are multiplied by what is known
+00:13:47.680 --> 00:13:53.200
+as weights which are represented as w1
+00:13:50.320 --> 00:13:56.240
+w2 and so on there is in the central
+00:13:53.200 --> 00:13:59.600
+unit basically there is a summation of
+00:13:56.240 --> 00:14:03.279
+these weighted inputs which is like x1
+00:13:59.600 --> 00:14:06.160
+into w1 plus x2 into w2 and so on the
+00:14:03.279 --> 00:14:08.079
+products are then added and then there
+00:14:06.160 --> 00:14:10.720
+is a bias that is added to that in the
+00:14:08.079 --> 00:14:12.959
+next slide we will see that passes
+00:14:10.720 --> 00:14:16.160
+through an activation function and the
+00:14:12.959 --> 00:14:18.720
+output comes as a y which is the output
+00:14:16.160 --> 00:14:20.880
+and based on certain criteria the cell
+00:14:18.720 --> 00:14:23.519
+gets either activated or not activated
+00:14:20.880 --> 00:14:26.959
+so this output would be like a zero or a
+00:14:23.519 --> 00:14:28.639
+one binary format okay so we will see
+00:14:26.959 --> 00:14:30.639
+that in a little bit more detail but
+00:14:28.639 --> 00:14:33.040
+let's do a quick comparison between
+00:14:30.639 --> 00:14:35.040
+biological and artificial neurons just
+00:14:33.040 --> 00:14:36.639
+like a biological neuron there are
+00:14:35.040 --> 00:14:39.600
+dendrites and then there is a cell
+00:14:36.639 --> 00:14:42.880
+nucleus and synapse and an axon
+00:14:39.600 --> 00:14:45.920
+we have in the artificial neuron as well
+00:14:42.880 --> 00:14:48.160
+these inputs come like the dead right if
+00:14:45.920 --> 00:14:50.320
+you will act like the dendrites there is
+00:14:48.160 --> 00:14:52.880
+a like a central unit which performs the
+00:14:50.320 --> 00:14:56.160
+summation of these uh weighted inputs
+00:14:52.880 --> 00:14:58.880
+which is basically w1 x1 w2 x2 and so on
+00:14:56.160 --> 00:15:00.639
+and then our bias is added here and then
+00:14:58.880 --> 00:15:02.880
+that passes through what is known as an
+00:15:00.639 --> 00:15:04.639
+activation function okay so these are
+00:15:02.880 --> 00:15:06.880
+known as the weights w1 w2 and then
+00:15:04.639 --> 00:15:09.519
+there is a bias which will come out here
+00:15:06.880 --> 00:15:11.600
+and that is added the bias is by the way
+00:15:09.519 --> 00:15:14.320
+common for a particular neuron so there
+00:15:11.600 --> 00:15:16.800
+won't be like b1 b2 b3 and so on only
+00:15:14.320 --> 00:15:19.440
+weights will be one per input the bias
+00:15:16.800 --> 00:15:22.639
+is common for the entire neuron it is
+00:15:19.440 --> 00:15:25.360
+also common for or the value of the bias
+00:15:22.639 --> 00:15:28.000
+remains the same for all the neurons in
+00:15:25.360 --> 00:15:29.920
+a particular layer we will also see this
+00:15:28.000 --> 00:15:31.600
+as we move forward and we see deep
+00:15:29.920 --> 00:15:34.160
+neural network where there are multiple
+00:15:31.600 --> 00:15:37.920
+neurons so that's the output now the
+00:15:34.160 --> 00:15:41.519
+whole exercise of training the neuron is
+00:15:37.920 --> 00:15:43.519
+about changing these weights and biases
+00:15:41.519 --> 00:15:46.000
+as i mentioned artificial neural network
+00:15:43.519 --> 00:15:48.560
+will consist of several such neurons and
+00:15:46.000 --> 00:15:50.880
+as a part of the training process these
+00:15:48.560 --> 00:15:53.120
+weights keep changing initially they are
+00:15:50.880 --> 00:15:55.360
+assigned some random values through the
+00:15:53.120 --> 00:15:57.279
+training process the weights the whole
+00:15:55.360 --> 00:16:00.880
+process of training is to come up with
+00:15:57.279 --> 00:16:02.959
+the optimum values of w1 w2 and wn and
+00:16:00.880 --> 00:16:05.519
+then the b4 or the bias for this
+00:16:02.959 --> 00:16:08.399
+particular neuron such that it gives an
+00:16:05.519 --> 00:16:11.040
+accurate output as required so let's see
+00:16:08.399 --> 00:16:13.440
+what exactly that means so the training
+00:16:11.040 --> 00:16:16.720
+process this is how it happens it takes
+00:16:13.440 --> 00:16:19.040
+the inputs each input is multiplied by a
+00:16:16.720 --> 00:16:20.639
+weight and these weights during training
+00:16:19.040 --> 00:16:23.440
+keep changing so initially they are
+00:16:20.639 --> 00:16:25.519
+assigned some random values and based on
+00:16:23.440 --> 00:16:27.519
+the output whether it is correct or
+00:16:25.519 --> 00:16:29.759
+wrong there is a feedback coming back
+00:16:27.519 --> 00:16:33.120
+and that will basically change these
+00:16:29.759 --> 00:16:36.320
+weights until it starts giving the right
+00:16:33.120 --> 00:16:39.199
+output that is represented in here as
+00:16:36.320 --> 00:16:42.320
+sigma i going from 1 to n if there are n
+00:16:39.199 --> 00:16:46.160
+inputs wi into x i so this is the
+00:16:42.320 --> 00:16:49.920
+product of w1 x1 w2 x2 and so on right
+00:16:46.160 --> 00:16:52.959
+and there is a bias that gets added here
+00:16:49.920 --> 00:16:55.360
+and that entire thing goes to what is
+00:16:52.959 --> 00:16:59.120
+known as an activation function so
+00:16:55.360 --> 00:17:02.160
+essentially this is sigma of w i x i
+00:16:59.120 --> 00:17:05.360
+plus a value of bias which is a b so
+00:17:02.160 --> 00:17:07.919
+that entire thing goes as an input to an
+00:17:05.360 --> 00:17:10.480
+activation function now this activation
+00:17:07.919 --> 00:17:13.520
+function takes this as an input gives
+00:17:10.480 --> 00:17:15.439
+the output as a binary output it could
+00:17:13.520 --> 00:17:17.439
+be a zero or a one there are of course
+00:17:15.439 --> 00:17:18.959
+to start with let's assume it's a binary
+00:17:17.439 --> 00:17:20.799
+output later we will see that there are
+00:17:18.959 --> 00:17:23.120
+different types of activation functions
+00:17:20.799 --> 00:17:25.439
+so it need not always be binary output
+00:17:23.120 --> 00:17:28.160
+but to start with let's keep simple so
+00:17:25.439 --> 00:17:30.799
+it decides whether the neuron should be
+00:17:28.160 --> 00:17:33.280
+fired or not so that is the output like
+00:17:30.799 --> 00:17:35.280
+a binary output 0 or 1. all right so
+00:17:33.280 --> 00:17:36.960
+again let me summarize this so it takes
+00:17:35.280 --> 00:17:39.280
+the inputs so if you're processing an
+00:17:36.960 --> 00:17:42.559
+image for example the inputs are the
+00:17:39.280 --> 00:17:44.559
+pixel values of the image x1 x2 up to xn
+00:17:42.559 --> 00:17:46.480
+there could be hundreds of these so all
+00:17:44.559 --> 00:17:48.559
+of those are fed as so these are some
+00:17:46.480 --> 00:17:51.200
+values and these pixel values again can
+00:17:48.559 --> 00:17:54.400
+be from 0 to 56 each of those pixel
+00:17:51.200 --> 00:17:56.160
+values are then multiplied with what is
+00:17:54.400 --> 00:17:58.160
+known as a weight this is a numeric
+00:17:56.160 --> 00:18:01.360
+value can be any value so this is a
+00:17:58.160 --> 00:18:03.679
+number w1 similarly w2 is a number so
+00:18:01.360 --> 00:18:05.600
+initially some random values will be
+00:18:03.679 --> 00:18:07.520
+assigned and each of these weights are
+00:18:05.600 --> 00:18:09.919
+multiplied with the input value and
+00:18:07.520 --> 00:18:12.320
+their sum this is known as the weighted
+00:18:09.919 --> 00:18:14.960
+sum so that is performed in this kind of
+00:18:12.320 --> 00:18:17.440
+the central unit and then a bias is
+00:18:14.960 --> 00:18:20.080
+added remember the bias is common for
+00:18:17.440 --> 00:18:21.760
+each neuron so this is not the bias
+00:18:20.080 --> 00:18:24.559
+value is not one
+00:18:21.760 --> 00:18:26.640
+bias value for per input so just keep
+00:18:24.559 --> 00:18:28.640
+that in mind the bias value there is one
+00:18:26.640 --> 00:18:31.360
+bias per neuron so it is like this
+00:18:28.640 --> 00:18:33.200
+summation plus bias is the output from
+00:18:31.360 --> 00:18:34.880
+the section this is not the complete
+00:18:33.200 --> 00:18:37.600
+output of the neuron but this is the
+00:18:34.880 --> 00:18:39.200
+bias for output for step one that goes
+00:18:37.600 --> 00:18:41.520
+as an input to what is known as
+00:18:39.200 --> 00:18:44.320
+activation function and that activation
+00:18:41.520 --> 00:18:46.720
+function results in an output usually a
+00:18:44.320 --> 00:18:49.440
+binary output like a zero or a one which
+00:18:46.720 --> 00:18:51.919
+is known as the firing of the neuron
+00:18:49.440 --> 00:18:53.840
+okay good so we talked about activation
+00:18:51.919 --> 00:18:55.760
+function so what is an activation
+00:18:53.840 --> 00:18:58.880
+function an activation function
+00:18:55.760 --> 00:19:02.640
+basically takes the weighted sum which
+00:18:58.880 --> 00:19:05.520
+is we saw w1 x1 w2 x2 the sum of all
+00:19:02.640 --> 00:19:08.799
+that plus the bias so it takes that as
+00:19:05.520 --> 00:19:10.640
+an input and it generates a certain
+00:19:08.799 --> 00:19:12.640
+output now there are different types of
+00:19:10.640 --> 00:19:14.160
+activation functions and the output is
+00:19:12.640 --> 00:19:16.720
+different for different types of
+00:19:14.160 --> 00:19:18.720
+activation functions moreover why is an
+00:19:16.720 --> 00:19:20.960
+activation function required it is
+00:19:18.720 --> 00:19:23.520
+basically required to bring in
+00:19:20.960 --> 00:19:25.760
+non-linearity that's the main reason why
+00:19:23.520 --> 00:19:26.880
+an activation function is required so
+00:19:25.760 --> 00:19:28.720
+what are the different types of
+00:19:26.880 --> 00:19:30.720
+activation functions there are several
+00:19:28.720 --> 00:19:32.720
+types of activation functions but these
+00:19:30.720 --> 00:19:35.200
+are the most common ones these are the
+00:19:32.720 --> 00:19:37.600
+ones that are currently in use sigmoid
+00:19:35.200 --> 00:19:41.440
+function was one of the early activation
+00:19:37.600 --> 00:19:44.400
+functions but today relu has kind of
+00:19:41.440 --> 00:19:46.960
+taken over so relu is by far the most
+00:19:44.400 --> 00:19:49.600
+popular activation function that is used
+00:19:46.960 --> 00:19:52.320
+today but still sigmoid function is
+00:19:49.600 --> 00:19:54.160
+still used in many situations these
+00:19:52.320 --> 00:19:56.400
+different types of activation functions
+00:19:54.160 --> 00:19:58.080
+are used in different situations based
+00:19:56.400 --> 00:20:00.000
+on the kind of problem we are trying to
+00:19:58.080 --> 00:20:01.840
+solve so what exactly is the difference
+00:20:00.000 --> 00:20:03.919
+between these two sigmoid gives the
+00:20:01.840 --> 00:20:06.799
+values of the output will be between 0
+00:20:03.919 --> 00:20:07.760
+and 1. threshold function is the value
+00:20:06.799 --> 00:20:10.240
+will be
+00:20:07.760 --> 00:20:12.400
+0 up to a certain value and beyond that
+00:20:10.240 --> 00:20:14.960
+this is also known as a step function
+00:20:12.400 --> 00:20:17.600
+and beyond that it will be 1. in case of
+00:20:14.960 --> 00:20:19.520
+sigmoid there is a gradual increase but
+00:20:17.600 --> 00:20:22.000
+in case of threshold it's like also
+00:20:19.520 --> 00:20:24.400
+known as a step function there's a rapid
+00:20:22.000 --> 00:20:26.080
+or instantaneous change from zero to one
+00:20:24.400 --> 00:20:28.400
+whereas in sigmoid we will see in the
+00:20:26.080 --> 00:20:30.640
+next slide there is a gradual increase
+00:20:28.400 --> 00:20:33.200
+but the value in this case is between
+00:20:30.640 --> 00:20:35.600
+zero and one as well now relu function
+00:20:33.200 --> 00:20:38.880
+on the other hand it is equal to
+00:20:35.600 --> 00:20:42.960
+basically if the input is 0 or less than
+00:20:38.880 --> 00:20:46.000
+0 then the output is 0 whereas if the
+00:20:42.960 --> 00:20:48.000
+input is greater than 0 then the output
+00:20:46.000 --> 00:20:49.919
+is equal to the input i know it's a
+00:20:48.000 --> 00:20:52.400
+little confusing but in the next slides
+00:20:49.919 --> 00:20:54.720
+where we show the relu function it will
+00:20:52.400 --> 00:20:57.679
+become clear similarly hyperbolic
+00:20:54.720 --> 00:21:00.159
+tangent this is similar to sigmoid in
+00:20:57.679 --> 00:21:03.360
+terms of the shape of the function
+00:21:00.159 --> 00:21:06.400
+however while sigmoid goes from 0 to 1
+00:21:03.360 --> 00:21:09.520
+hyperbolic tangent goes from -1 to 1 and
+00:21:06.400 --> 00:21:13.760
+here again the increase or the change
+00:21:09.520 --> 00:21:15.760
+from -1 to 1 is gradual and not like
+00:21:13.760 --> 00:21:18.080
+threshold or step function where it
+00:21:15.760 --> 00:21:20.159
+happens instantaneously so let's take a
+00:21:18.080 --> 00:21:21.919
+little detailed look at some of these
+00:21:20.159 --> 00:21:23.919
+functions so let's start with the
+00:21:21.919 --> 00:21:26.559
+sigmoid function so this is the equation
+00:21:23.919 --> 00:21:29.679
+of a sigmoid function which is 1 by 1
+00:21:26.559 --> 00:21:32.799
+plus e to the power of minus x so x is
+00:21:29.679 --> 00:21:36.880
+the value that is the input it goes from
+00:21:32.799 --> 00:21:40.000
+0 to -1 so this is sigmoid function the
+00:21:36.880 --> 00:21:42.640
+equation is phi x is equal to 1 by 1
+00:21:40.000 --> 00:21:44.400
+plus e to the power of minus x and as
+00:21:42.640 --> 00:21:47.520
+you can see here this is the input on
+00:21:44.400 --> 00:21:49.600
+the x-axis as x is where the value is
+00:21:47.520 --> 00:21:51.440
+coming from in fact it can also go
+00:21:49.600 --> 00:21:53.200
+negative this is negative actually so
+00:21:51.440 --> 00:21:55.520
+this is the zero so this is the negative
+00:21:53.200 --> 00:21:58.720
+value of x so as x is coming from
+00:21:55.520 --> 00:22:02.080
+negative value towards zero the value of
+00:21:58.720 --> 00:22:05.120
+the output slowly as it is approaching
+00:22:02.080 --> 00:22:08.320
+zero it it slowly and very gently
+00:22:05.120 --> 00:22:11.600
+increases and actually at the point let
+00:22:08.320 --> 00:22:15.919
+me just use a pen at the point here it
+00:22:11.600 --> 00:22:19.039
+is it is 0.5 it is actually 0.5 okay and
+00:22:15.919 --> 00:22:21.440
+slowly gradually it increases to 1 as
+00:22:19.039 --> 00:22:24.400
+the value of x increases but then as the
+00:22:21.440 --> 00:22:27.360
+value of x increases it tapers off it
+00:22:24.400 --> 00:22:29.840
+doesn't go beyond one so that is the
+00:22:27.360 --> 00:22:32.320
+speciality of sigmoid function so the
+00:22:29.840 --> 00:22:34.960
+output value will remain between zero
+00:22:32.320 --> 00:22:37.360
+and one it will never go below zero or
+00:22:34.960 --> 00:22:39.679
+above one okay then so that is sigmoid
+00:22:37.360 --> 00:22:42.000
+function now this is threshold function
+00:22:39.679 --> 00:22:44.880
+or this is also referred to as a step
+00:22:42.000 --> 00:22:46.640
+function and here we can also set the
+00:22:44.880 --> 00:22:48.240
+threshold in this case it is that's why
+00:22:46.640 --> 00:22:50.720
+it's called the threshold function
+00:22:48.240 --> 00:22:52.559
+normally it is 0 but you can also set a
+00:22:50.720 --> 00:22:54.240
+different value for the threshold now
+00:22:52.559 --> 00:22:57.120
+the difference between this and the
+00:22:54.240 --> 00:22:59.840
+sigmoid is that here the change is rapid
+00:22:57.120 --> 00:23:02.799
+or instantaneous as the x value comes
+00:22:59.840 --> 00:23:06.240
+from negative up to zero it remains zero
+00:23:02.799 --> 00:23:08.640
+and at zero it pretty much immediately
+00:23:06.240 --> 00:23:11.280
+increases to 1 okay so this is a
+00:23:08.640 --> 00:23:13.919
+mathematical representation of threshold
+00:23:11.280 --> 00:23:16.799
+function phi x is equal to 1 if x is
+00:23:13.919 --> 00:23:18.799
+greater than equal to 0 and 0 if x is
+00:23:16.799 --> 00:23:20.640
+less than 0. so for all negative values
+00:23:18.799 --> 00:23:23.120
+it is 0 which since we have set the
+00:23:20.640 --> 00:23:25.679
+threshold to be 0 so as soon as it
+00:23:23.120 --> 00:23:28.640
+reaches 0 it becomes 1. you see the
+00:23:25.679 --> 00:23:31.520
+difference between this and the previous
+00:23:28.640 --> 00:23:34.720
+one which is basically the sigmoid where
+00:23:31.520 --> 00:23:37.120
+the increase from 0 to 1 is gradual and
+00:23:34.720 --> 00:23:39.200
+here it is instantaneous and that's why
+00:23:37.120 --> 00:23:41.440
+this is also known as a step function
+00:23:39.200 --> 00:23:43.679
+threshold function or step function this
+00:23:41.440 --> 00:23:46.159
+is a relu a relu is one of the most
+00:23:43.679 --> 00:23:48.799
+popular activation functions today this
+00:23:46.159 --> 00:23:51.679
+is the definition of relu phi x is equal
+00:23:48.799 --> 00:23:54.400
+to max of x comma zero what it says is
+00:23:51.679 --> 00:23:55.679
+if the value of x is less than zero then
+00:23:54.400 --> 00:23:58.880
+phi x is
+00:23:55.679 --> 00:24:03.600
+zero the moment it increases goes beyond
+00:23:58.880 --> 00:24:06.720
+zero the value of phi x is equal to x so
+00:24:03.600 --> 00:24:08.799
+it doesn't stop at one actually it goes
+00:24:06.720 --> 00:24:10.720
+all the way so as the value of x
+00:24:08.799 --> 00:24:13.440
+increases the value of y will also
+00:24:10.720 --> 00:24:17.760
+increase infinitely so there is no limit
+00:24:13.440 --> 00:24:19.760
+here unlike your sigmoid or threshold or
+00:24:17.760 --> 00:24:22.559
+the next one which is basically
+00:24:19.760 --> 00:24:25.200
+hyperbolic tangent okay so in case of
+00:24:22.559 --> 00:24:28.080
+relu remember there is no upper limit
+00:24:25.200 --> 00:24:31.039
+the output is equal to either 0 in case
+00:24:28.080 --> 00:24:34.240
+the value of x is negative or it is
+00:24:31.039 --> 00:24:37.039
+equal to the value of x so for example
+00:24:34.240 --> 00:24:39.840
+here if the value of x is 10 then the
+00:24:37.039 --> 00:24:42.960
+value of y is also 10 right okay so that
+00:24:39.840 --> 00:24:45.679
+is relu and there are several advantages
+00:24:42.960 --> 00:24:48.159
+of relu and it is much more efficient
+00:24:45.679 --> 00:24:49.840
+and provides much more accuracy compared
+00:24:48.159 --> 00:24:51.679
+to other activation functions like
+00:24:49.840 --> 00:24:54.320
+sigmoid and so on so that's the reason
+00:24:51.679 --> 00:24:56.640
+it is very popular all right so this is
+00:24:54.320 --> 00:24:58.640
+hyperbolic tangent activation function
+00:24:56.640 --> 00:25:01.279
+the function looks similar to sigmoid
+00:24:58.640 --> 00:25:03.360
+function the curve if you see the shape
+00:25:01.279 --> 00:25:05.279
+it looks similar to sigmoid function but
+00:25:03.360 --> 00:25:08.080
+the difference between hyperbolic
+00:25:05.279 --> 00:25:10.799
+tangent and sigmoid function is that in
+00:25:08.080 --> 00:25:13.200
+case of sigmoid the output goes from
+00:25:10.799 --> 00:25:16.960
+zero to one whereas in case of
+00:25:13.200 --> 00:25:18.559
+hyperbolic tangent it goes from -1 to 1
+00:25:16.960 --> 00:25:21.360
+so that is the difference between
+00:25:18.559 --> 00:25:23.840
+hyperbolic tangent and sigmoid function
+00:25:21.360 --> 00:25:26.799
+otherwise the shape looks very similar
+00:25:23.840 --> 00:25:29.279
+there is a gradual increase unlike the
+00:25:26.799 --> 00:25:31.840
+step function where there was an instant
+00:25:29.279 --> 00:25:34.159
+increase or instant change here again
+00:25:31.840 --> 00:25:37.679
+very similar to sigmoid function the
+00:25:34.159 --> 00:25:40.080
+value changes gradually from -1 to 1. so
+00:25:37.679 --> 00:25:42.720
+this is the equation of hyperbolic
+00:25:40.080 --> 00:25:44.799
+tangent activation function yeah so then
+00:25:42.720 --> 00:25:47.200
+let's move on this is a diagrammatic
+00:25:44.799 --> 00:25:50.880
+representation of the activation
+00:25:47.200 --> 00:25:53.440
+function and how the overall data how
+00:25:50.880 --> 00:25:55.840
+the overall progression happens from
+00:25:53.440 --> 00:25:57.679
+input to the output so we get the input
+00:25:55.840 --> 00:25:59.919
+from the input layer by the way the
+00:25:57.679 --> 00:26:01.440
+neural network has three layers
+00:25:59.919 --> 00:26:03.120
+typically there will be three layers
+00:26:01.440 --> 00:26:04.880
+there is an input layer there is an
+00:26:03.120 --> 00:26:07.600
+output layer and then you have the
+00:26:04.880 --> 00:26:10.240
+hidden layer so the inputs come from the
+00:26:07.600 --> 00:26:12.240
+input layer and they get processed in
+00:26:10.240 --> 00:26:14.400
+the hidden layer and then you get the
+00:26:12.240 --> 00:26:16.960
+output in the output layer so let's take
+00:26:14.400 --> 00:26:19.840
+a little bit of a detailed look into the
+00:26:16.960 --> 00:26:22.880
+working of a neural network so let's say
+00:26:19.840 --> 00:26:25.679
+we want to classify some images between
+00:26:22.880 --> 00:26:28.400
+dogs and cats how do we do this this is
+00:26:25.679 --> 00:26:30.159
+known as a classification process and we
+00:26:28.400 --> 00:26:31.600
+are trying to use neural networks and
+00:26:30.159 --> 00:26:33.520
+deep learning to implement this
+00:26:31.600 --> 00:26:37.440
+classification so how do we do that so
+00:26:33.520 --> 00:26:40.159
+this is how it works so you have four
+00:26:37.440 --> 00:26:42.559
+layer neural network there is an input
+00:26:40.159 --> 00:26:45.440
+layer there is an output layer and then
+00:26:42.559 --> 00:26:49.440
+there are two hidden layers and what we
+00:26:45.440 --> 00:26:52.080
+do is we provide labeled training data
+00:26:49.440 --> 00:26:54.640
+which means these images are fed to the
+00:26:52.080 --> 00:26:57.120
+network with the label saying that okay
+00:26:54.640 --> 00:27:00.159
+this is a cat the neural network is
+00:26:57.120 --> 00:27:02.480
+allowed to process it and come up with a
+00:27:00.159 --> 00:27:05.039
+prediction saying whether it is a cat or
+00:27:02.480 --> 00:27:07.200
+a dog and obviously in the beginning
+00:27:05.039 --> 00:27:09.760
+there may be mistakes a cat may be
+00:27:07.200 --> 00:27:12.080
+classified as a dog so we then say that
+00:27:09.760 --> 00:27:14.000
+okay this is wrong this output is wrong
+00:27:12.080 --> 00:27:16.559
+but every time it predicts correctly we
+00:27:14.000 --> 00:27:19.120
+say yes this output is correct so that
+00:27:16.559 --> 00:27:21.760
+learning process so it will go back make
+00:27:19.120 --> 00:27:24.720
+some changes to its weights and biases
+00:27:21.760 --> 00:27:26.799
+we again feed these inputs and it will
+00:27:24.720 --> 00:27:28.799
+give us the output we will check whether
+00:27:26.799 --> 00:27:31.360
+it is correct or not and so on so this
+00:27:28.799 --> 00:27:34.320
+is a iterative process which is known as
+00:27:31.360 --> 00:27:36.880
+the training process so we are training
+00:27:34.320 --> 00:27:39.440
+the neural network and what happens in
+00:27:36.880 --> 00:27:41.760
+the training process these weights and
+00:27:39.440 --> 00:27:45.600
+biases you remember there were weights
+00:27:41.760 --> 00:27:48.880
+like w1 w2 and so on so these weights
+00:27:45.600 --> 00:27:51.679
+and biases keep changing every time you
+00:27:48.880 --> 00:27:53.760
+feed these which is known as an epoch so
+00:27:51.679 --> 00:27:56.159
+there are multiple iterations every
+00:27:53.760 --> 00:27:58.960
+iteration is known as an epoch and each
+00:27:56.159 --> 00:28:01.279
+time the weights are dated to make sure
+00:27:58.960 --> 00:28:03.679
+that the maximum number of images are
+00:28:01.279 --> 00:28:06.080
+classified correctly so once again what
+00:28:03.679 --> 00:28:09.600
+is the input this input could be like
+00:28:06.080 --> 00:28:12.159
+1000 images of cats and dogs and they
+00:28:09.600 --> 00:28:14.559
+are labeled because we know which is a
+00:28:12.159 --> 00:28:17.039
+cat and which is a dog and we feed those
+00:28:14.559 --> 00:28:18.960
+thousand images the neural network will
+00:28:17.039 --> 00:28:20.799
+initially assign some weights and biases
+00:28:18.960 --> 00:28:23.120
+for each neuron and it will try to
+00:28:20.799 --> 00:28:25.120
+process extract the features from the
+00:28:23.120 --> 00:28:27.279
+images and it will try to come up with a
+00:28:25.120 --> 00:28:29.679
+prediction for each image and that
+00:28:27.279 --> 00:28:32.240
+prediction that is calculated by the
+00:28:29.679 --> 00:28:34.240
+network is compared with the actual
+00:28:32.240 --> 00:28:36.399
+value whether it is a cat or a dog and
+00:28:34.240 --> 00:28:38.559
+that's how the error is calculated so
+00:28:36.399 --> 00:28:41.279
+let's say there are a thousand images
+00:28:38.559 --> 00:28:43.200
+and in the first run only 500 of them
+00:28:41.279 --> 00:28:45.440
+have been correctly classified that
+00:28:43.200 --> 00:28:47.440
+means we are getting only 50 accuracy so
+00:28:45.440 --> 00:28:49.760
+we feed that information back to the
+00:28:47.440 --> 00:28:51.919
+network further update these weights and
+00:28:49.760 --> 00:28:54.480
+biases for each of the neurons and we
+00:28:51.919 --> 00:28:56.320
+run this these inputs once again it will
+00:28:54.480 --> 00:28:58.000
+try to calculate extract the features
+00:28:56.320 --> 00:28:59.840
+and it will try to predict which of
+00:28:58.000 --> 00:29:02.399
+these is cats and dogs and this time
+00:28:59.840 --> 00:29:04.480
+let's say out of thousand 700 of them
+00:29:02.399 --> 00:29:06.720
+have been predicted correctly so that
+00:29:04.480 --> 00:29:09.679
+means in the second iteration the
+00:29:06.720 --> 00:29:12.559
+accuracy has increased from 50 to 70
+00:29:09.679 --> 00:29:15.039
+percent all right then we go back again
+00:29:12.559 --> 00:29:17.760
+we feed this maybe for a third iteration
+00:29:15.039 --> 00:29:20.799
+fourth iteration and so on and slowly
+00:29:17.760 --> 00:29:23.360
+and steadily the accuracy of this
+00:29:20.799 --> 00:29:26.080
+network will keep increasing and it may
+00:29:23.360 --> 00:29:28.240
+reach probably you never know 90 95
+00:29:26.080 --> 00:29:30.240
+percent and there are several parameters
+00:29:28.240 --> 00:29:32.720
+that are known as hyper parameters that
+00:29:30.240 --> 00:29:34.880
+need to be changed and tweaked and that
+00:29:32.720 --> 00:29:37.760
+is the overall training process and
+00:29:34.880 --> 00:29:39.200
+ultimately at some point we say okay you
+00:29:37.760 --> 00:29:42.080
+will probably never reach hundred
+00:29:39.200 --> 00:29:44.159
+percent accuracy but then we set a limit
+00:29:42.080 --> 00:29:46.080
+saying that okay if we receive 95
+00:29:44.159 --> 00:29:48.399
+percent accuracy that is good enough for
+00:29:46.080 --> 00:29:50.320
+our application and then we say okay our
+00:29:48.399 --> 00:29:53.120
+training process is done so that is the
+00:29:50.320 --> 00:29:55.760
+way training happens and once the
+00:29:53.120 --> 00:29:58.399
+training is done now with the training
+00:29:55.760 --> 00:30:01.039
+data set the system has let's say seen
+00:29:58.399 --> 00:30:03.760
+all these thousand images therefore what
+00:30:01.039 --> 00:30:05.840
+we do is the next step like in any
+00:30:03.760 --> 00:30:08.399
+normal machine learning process we do
+00:30:05.840 --> 00:30:10.799
+the testing where we take a fresh set of
+00:30:08.399 --> 00:30:13.039
+images and we feed it to the network the
+00:30:10.799 --> 00:30:14.880
+fresh set which it has not seen before
+00:30:13.039 --> 00:30:16.559
+as a part of the training process and
+00:30:14.880 --> 00:30:18.159
+this is again nothing new in deep
+00:30:16.559 --> 00:30:20.720
+learning this was there in machine
+00:30:18.159 --> 00:30:23.440
+learning as well so you feed the test
+00:30:20.720 --> 00:30:25.520
+images and then find out whether we are
+00:30:23.440 --> 00:30:27.600
+getting a similar accuracy or not so
+00:30:25.520 --> 00:30:29.520
+maybe that accuracy may reduce a little
+00:30:27.600 --> 00:30:31.840
+bit while training you may get 98
+00:30:29.520 --> 00:30:33.760
+percent and then for test you may get 95
+00:30:31.840 --> 00:30:36.480
+percent but there shouldn't be a drastic
+00:30:33.760 --> 00:30:38.880
+drop like for example you get 98 in
+00:30:36.480 --> 00:30:40.799
+training and then you get 50 or 40
+00:30:38.880 --> 00:30:43.279
+percent with the test that means your
+00:30:40.799 --> 00:30:46.320
+network has not learned you may have to
+00:30:43.279 --> 00:30:47.919
+retrain your network so that is the way
+00:30:46.320 --> 00:30:50.799
+neural network training works and
+00:30:47.919 --> 00:30:53.279
+remember the whole process is about
+00:30:50.799 --> 00:30:55.679
+changing these weights and biases and
+00:30:53.279 --> 00:30:57.520
+coming up with the optimal values of
+00:30:55.679 --> 00:31:00.240
+these weights and biases so that the
+00:30:57.520 --> 00:31:02.960
+accuracy is the maximum possible all
+00:31:00.240 --> 00:31:04.960
+right so a little bit more detail about
+00:31:02.960 --> 00:31:07.520
+how this whole thing works so this is
+00:31:04.960 --> 00:31:09.840
+known as forward propagation which is
+00:31:07.520 --> 00:31:12.320
+the data or the information is going in
+00:31:09.840 --> 00:31:15.279
+the forward direction the inputs are
+00:31:12.320 --> 00:31:18.399
+taken weighted summation is done bias is
+00:31:15.279 --> 00:31:21.039
+added here and then that is fed to the
+00:31:18.399 --> 00:31:23.200
+activation function and then that is
+00:31:21.039 --> 00:31:25.360
+that comes out as an output so that is
+00:31:23.200 --> 00:31:27.360
+forward propagation and the output is
+00:31:25.360 --> 00:31:29.039
+compared with the actual value and that
+00:31:27.360 --> 00:31:31.200
+will give us the error the difference
+00:31:29.039 --> 00:31:33.679
+between them is the error and in
+00:31:31.200 --> 00:31:36.720
+technical terms that is also known as
+00:31:33.679 --> 00:31:38.880
+our cost function and this is what we
+00:31:36.720 --> 00:31:40.559
+would like to minimize there are
+00:31:38.880 --> 00:31:44.000
+different ways of defining the cost
+00:31:40.559 --> 00:31:47.200
+function but one of the simplest ways is
+00:31:44.000 --> 00:31:49.120
+mean square error so it is nothing but
+00:31:47.200 --> 00:31:51.919
+the square of the difference of the
+00:31:49.120 --> 00:31:53.679
+errors or the sum of the squares of the
+00:31:51.919 --> 00:31:56.240
+difference of the errors and this is
+00:31:53.679 --> 00:31:57.760
+also nothing new we have probably if
+00:31:56.240 --> 00:31:59.760
+you're familiar with machine learning
+00:31:57.760 --> 00:32:02.159
+you must have come across this mean
+00:31:59.760 --> 00:32:04.320
+square now there are different ways of
+00:32:02.159 --> 00:32:06.240
+defining cost function it need not
+00:32:04.320 --> 00:32:08.720
+always be the mean square error but the
+00:32:06.240 --> 00:32:11.760
+most common one is this so you define
+00:32:08.720 --> 00:32:15.200
+this cost function and you ask the
+00:32:11.760 --> 00:32:17.600
+system to minimize this error so we use
+00:32:15.200 --> 00:32:21.039
+what is known as an optimization
+00:32:17.600 --> 00:32:23.519
+function to minimize this error and the
+00:32:21.039 --> 00:32:25.840
+error itself sent back to the system as
+00:32:23.519 --> 00:32:27.600
+feedback and that is known as back
+00:32:25.840 --> 00:32:30.080
+propagation and so this is the cost
+00:32:27.600 --> 00:32:32.880
+function and how do we optimize the cost
+00:32:30.080 --> 00:32:35.919
+function we use what is known as
+00:32:32.880 --> 00:32:39.519
+gradient descent so the gradient descent
+00:32:35.919 --> 00:32:42.480
+mechanism identifies how to change the
+00:32:39.519 --> 00:32:45.760
+weights and biases so that the cost
+00:32:42.480 --> 00:32:47.919
+function is minimized and there is also
+00:32:45.760 --> 00:32:50.159
+what is known as the rate or the
+00:32:47.919 --> 00:32:53.120
+learning rate that is what is shown here
+00:32:50.159 --> 00:32:55.919
+as slower and faster so you need to
+00:32:53.120 --> 00:32:59.360
+specify what should be the learning rate
+00:32:55.919 --> 00:33:02.480
+now if the learning rate is very small
+00:32:59.360 --> 00:33:04.480
+then it will probably take very long to
+00:33:02.480 --> 00:33:07.279
+train whereas if the learning rate is
+00:33:04.480 --> 00:33:09.840
+very high then it will appear to be
+00:33:07.279 --> 00:33:12.159
+faster but then it will probably never
+00:33:09.840 --> 00:33:14.480
+what is known as converge now what is
+00:33:12.159 --> 00:33:17.760
+convergence now we are talking about a
+00:33:14.480 --> 00:33:20.159
+few terms here convergence is like this
+00:33:17.760 --> 00:33:24.000
+this is a representation of convergence
+00:33:20.159 --> 00:33:26.240
+so the whole idea of gradient descent is
+00:33:24.000 --> 00:33:28.640
+to optimize the cost function or
+00:33:26.240 --> 00:33:30.880
+minimize the cost function in order to
+00:33:28.640 --> 00:33:34.000
+do that we need to represent the cost
+00:33:30.880 --> 00:33:36.480
+function as this curve we need to come
+00:33:34.000 --> 00:33:38.960
+to this minimum value that is what is
+00:33:36.480 --> 00:33:41.840
+known as the minimization of the cost
+00:33:38.960 --> 00:33:44.720
+function now what happens if we have the
+00:33:41.840 --> 00:33:48.000
+learning rate very small is that it will
+00:33:44.720 --> 00:33:51.200
+take very long to come to this point on
+00:33:48.000 --> 00:33:53.279
+the other hand if you have large higher
+00:33:51.200 --> 00:33:56.159
+learning rate what will happen is
+00:33:53.279 --> 00:33:58.559
+instead of stopping here it will cross
+00:33:56.159 --> 00:34:01.279
+over because the learning rate is high
+00:33:58.559 --> 00:34:03.440
+and then it has to come back so it will
+00:34:01.279 --> 00:34:05.440
+result in what is known as like an
+00:34:03.440 --> 00:34:07.760
+oscillation so it will never come to
+00:34:05.440 --> 00:34:10.639
+this point which is known as convergence
+00:34:07.760 --> 00:34:13.040
+instead it will go back and forth so
+00:34:10.639 --> 00:34:14.960
+these are known as hyper parameters the
+00:34:13.040 --> 00:34:17.520
+learning rate and so on and these have
+00:34:14.960 --> 00:34:20.639
+to be those numbers or those values we
+00:34:17.520 --> 00:34:23.040
+can determine typically using trial and
+00:34:20.639 --> 00:34:25.359
+error out of experience we we try to
+00:34:23.040 --> 00:34:28.639
+find out these values so that is the
+00:34:25.359 --> 00:34:30.560
+gradient descent mechanism to optimize
+00:34:28.639 --> 00:34:34.399
+the cost function and that is what is
+00:34:30.560 --> 00:34:36.560
+used to train our neural network this is
+00:34:34.399 --> 00:34:38.720
+another representation of how the
+00:34:36.560 --> 00:34:41.200
+training process works and here in this
+00:34:38.720 --> 00:34:44.320
+example we are trying to classify these
+00:34:41.200 --> 00:34:46.960
+images whether they are cats or dogs and
+00:34:44.320 --> 00:34:49.599
+as you can see actually each image is
+00:34:46.960 --> 00:34:54.000
+fed in each time one image is fed rather
+00:34:49.599 --> 00:34:56.960
+and these values of x1 x2 up to xn are
+00:34:54.000 --> 00:34:59.280
+the pixel values within this image okay
+00:34:56.960 --> 00:35:01.920
+so those values are then taken and for
+00:34:59.280 --> 00:35:04.320
+each of those values a weight is
+00:35:01.920 --> 00:35:06.079
+multiplied and then it goes to the next
+00:35:04.320 --> 00:35:08.480
+layer and then to the next layer and so
+00:35:06.079 --> 00:35:10.880
+on ultimately it comes as the output
+00:35:08.480 --> 00:35:13.839
+layer and it gives an output as whether
+00:35:10.880 --> 00:35:16.720
+it is a dog or a cat remember the output
+00:35:13.839 --> 00:35:19.520
+will never be a named output so these
+00:35:16.720 --> 00:35:22.400
+would be like a zero or a one and we say
+00:35:19.520 --> 00:35:24.400
+okay zero corresponds to dogs and one
+00:35:22.400 --> 00:35:26.640
+corresponds to catch so that is the way
+00:35:24.400 --> 00:35:28.800
+it typically happens this is a binary
+00:35:26.640 --> 00:35:31.280
+classification we have similar
+00:35:28.800 --> 00:35:32.960
+situations where there can be multiple
+00:35:31.280 --> 00:35:34.960
+classes which means that there will be
+00:35:32.960 --> 00:35:38.160
+multiple more neurons in the output
+00:35:34.960 --> 00:35:39.920
+layer okay so this is once again a quick
+00:35:38.160 --> 00:35:41.839
+representation of how the forward
+00:35:39.920 --> 00:35:44.400
+propagation and the backward propagation
+00:35:41.839 --> 00:35:46.640
+works so the information is going
+00:35:44.400 --> 00:35:49.119
+in this direction which is basically the
+00:35:46.640 --> 00:35:50.079
+forward propagation and at the output
+00:35:49.119 --> 00:35:53.200
+level
+00:35:50.079 --> 00:35:56.480
+we find out what is the cost function
+00:35:53.200 --> 00:35:58.560
+the difference is basically sent back as
+00:35:56.480 --> 00:36:01.040
+part of the backward propagation and
+00:35:58.560 --> 00:36:03.520
+gradient descent then adjust the weights
+00:36:01.040 --> 00:36:06.160
+and biases for the next iteration this
+00:36:03.520 --> 00:36:09.280
+happens iteratively till the cost
+00:36:06.160 --> 00:36:11.680
+function is minimized and that is when
+00:36:09.280 --> 00:36:13.760
+we say the whole the network has
+00:36:11.680 --> 00:36:16.160
+converged or the training process has
+00:36:13.760 --> 00:36:18.880
+converged and there can be situations
+00:36:16.160 --> 00:36:21.599
+where convergence may not happen in rare
+00:36:18.880 --> 00:36:24.000
+cases but by and large the network will
+00:36:21.599 --> 00:36:26.320
+converge and after maybe a few
+00:36:24.000 --> 00:36:28.160
+iterations it could be tens of
+00:36:26.320 --> 00:36:30.160
+iterations or hundreds of iterations
+00:36:28.160 --> 00:36:32.800
+depending on what exactly the number of
+00:36:30.160 --> 00:36:35.599
+iterations can vary and then we say okay
+00:36:32.800 --> 00:36:38.079
+we are getting a certain accuracy and we
+00:36:35.599 --> 00:36:40.800
+say that is our threshold maybe 90
+00:36:38.079 --> 00:36:42.880
+accuracy we stop at that and we say that
+00:36:40.800 --> 00:36:44.640
+the system is trained the trained model
+00:36:42.880 --> 00:36:47.440
+is then deployed for production and so
+00:36:44.640 --> 00:36:49.920
+on so that is the way the neural network
+00:36:47.440 --> 00:36:53.200
+training happens okay so that is the way
+00:36:49.920 --> 00:36:56.079
+classification works in deep learning
+00:36:53.200 --> 00:36:59.280
+using neural network and this slide is
+00:36:56.079 --> 00:37:01.520
+an animation of this whole process as
+00:36:59.280 --> 00:37:04.079
+you can see the forward propagation the
+00:37:01.520 --> 00:37:06.160
+data is going forward from the input
+00:37:04.079 --> 00:37:07.359
+layer to the output layer and there is
+00:37:06.160 --> 00:37:10.000
+an output
+00:37:07.359 --> 00:37:12.960
+and the error is calculated the cost
+00:37:10.000 --> 00:37:15.359
+function is calculated and that is fed
+00:37:12.960 --> 00:37:18.320
+back as a part of backward propagation
+00:37:15.359 --> 00:37:20.800
+and that whole process repeats once
+00:37:18.320 --> 00:37:23.359
+again okay so remember in neural
+00:37:20.800 --> 00:37:27.520
+networks the training process is nothing
+00:37:23.359 --> 00:37:29.760
+but the finding the best values of the
+00:37:27.520 --> 00:37:32.400
+weights and biases for each and every
+00:37:29.760 --> 00:37:34.960
+neuron in the network that's all
+00:37:32.400 --> 00:37:37.760
+training of neural network consists of
+00:37:34.960 --> 00:37:40.960
+finding the optimal values of the
+00:37:37.760 --> 00:37:44.800
+weights and biases so that the accuracy
+00:37:40.960 --> 00:37:47.040
+is maximum all right so with that we
+00:37:44.800 --> 00:37:51.800
+come to the end of the session we all
+00:37:47.040 --> 00:37:51.800
+have a great day thank you very much
+00:37:53.839 --> 00:37:57.359
+hi there if you like this video
+00:37:55.680 --> 00:38:00.000
+subscribe to the simply learn youtube
+00:37:57.359 --> 00:38:02.160
+channel and click here to watch similar
+00:38:00.000 --> 00:38:05.480
+videos turn it up and get certified
+00:38:02.160 --> 00:38:05.480
+click here

data/subtitles/Machine Learning.vtt ADDED Viewed

	@@ -0,0 +1,621 @@

+WEBVTT - Subtitles by: DownloadYoutubeSubtitles.com
+00:00:00.240 --> 00:00:03.760
+we know humans learn from their past
+00:00:02.320 --> 00:00:05.680
+experiences
+00:00:03.760 --> 00:00:07.359
+and machines follow instructions given
+00:00:05.680 --> 00:00:09.599
+by humans
+00:00:07.359 --> 00:00:11.519
+but what if humans can train the
+00:00:09.599 --> 00:00:14.000
+machines to learn from the past data and
+00:00:11.519 --> 00:00:15.839
+do what humans can do and much faster
+00:00:14.000 --> 00:00:17.760
+well that's called machine learning but
+00:00:15.839 --> 00:00:20.000
+it's a lot more than just learning it's
+00:00:17.760 --> 00:00:22.400
+also about understanding and reasoning
+00:00:20.000 --> 00:00:24.240
+so today we will learn about the basics
+00:00:22.400 --> 00:00:26.800
+of machine learning
+00:00:24.240 --> 00:00:28.800
+so that's paul he loves listening to new
+00:00:26.800 --> 00:00:30.880
+songs
+00:00:28.800 --> 00:00:33.120
+he either likes them or dislikes them
+00:00:30.880 --> 00:00:34.880
+paul decides this on the basis of the
+00:00:33.120 --> 00:00:36.000
+song's tempo
+00:00:34.880 --> 00:00:39.040
+genre
+00:00:36.000 --> 00:00:41.440
+intensity and the gender of voice for
+00:00:39.040 --> 00:00:44.559
+simplicity let's just use tempo and
+00:00:41.440 --> 00:00:47.680
+intensity for now so here tempo is on
+00:00:44.559 --> 00:00:50.320
+the x axis ranging from relaxed to fast
+00:00:47.680 --> 00:00:53.280
+whereas intensity is on the y axis
+00:00:50.320 --> 00:00:56.879
+ranging from light to soaring we see
+00:00:53.280 --> 00:00:59.840
+that paul likes the song with fast tempo
+00:00:56.879 --> 00:01:02.800
+and soaring intensity while he dislikes
+00:00:59.840 --> 00:01:05.280
+the song with relaxed tempo and light
+00:01:02.800 --> 00:01:07.360
+intensity so now we know paul's choices
+00:01:05.280 --> 00:01:10.720
+let's say paul listens to a new song
+00:01:07.360 --> 00:01:13.680
+let's name it as song a song a has fast
+00:01:10.720 --> 00:01:15.840
+tempo and a soaring intensity so it lies
+00:01:13.680 --> 00:01:17.759
+somewhere here looking at the data can
+00:01:15.840 --> 00:01:20.560
+you guess whether paul will like the
+00:01:17.759 --> 00:01:23.040
+song or not correct so paul likes this
+00:01:20.560 --> 00:01:25.119
+song by looking at paul's past choices
+00:01:23.040 --> 00:01:28.400
+we were able to classify the unknown
+00:01:25.119 --> 00:01:30.880
+song very easily right let's say now
+00:01:28.400 --> 00:01:33.439
+paul listens to a new song let's label
+00:01:30.880 --> 00:01:36.720
+it as song b so song b
+00:01:33.439 --> 00:01:39.439
+lies somewhere here with medium tempo
+00:01:36.720 --> 00:01:42.400
+and medium intensity neither relaxed nor
+00:01:39.439 --> 00:01:44.479
+fast neither light nor soaring now can
+00:01:42.400 --> 00:01:46.560
+you guess whether paul likes it or not
+00:01:44.479 --> 00:01:49.200
+not able to guess whether paul will like
+00:01:46.560 --> 00:01:52.159
+it or dislike it are the choices unclear
+00:01:49.200 --> 00:01:54.640
+correct we could easily classify song a
+00:01:52.159 --> 00:01:57.200
+but when the choice became complicated
+00:01:54.640 --> 00:01:59.119
+as in the case of song b yes and that's
+00:01:57.200 --> 00:02:01.920
+where machine learning comes in let's
+00:01:59.119 --> 00:02:04.240
+see how in the same example for song b
+00:02:01.920 --> 00:02:06.719
+if we draw a circle around the song b we
+00:02:04.240 --> 00:02:09.440
+see that there are four votes for like
+00:02:06.719 --> 00:02:11.760
+whereas one would for dislike if we go
+00:02:09.440 --> 00:02:13.440
+for the majority votes we can say that
+00:02:11.760 --> 00:02:15.120
+paul will definitely like the song
+00:02:13.440 --> 00:02:17.120
+that's all this was a basic machine
+00:02:15.120 --> 00:02:19.200
+learning algorithm also it's called k
+00:02:17.120 --> 00:02:21.599
+nearest neighbors so this is just a
+00:02:19.200 --> 00:02:24.319
+small example in one of the many machine
+00:02:21.599 --> 00:02:27.440
+learning algorithms quite easy right
+00:02:24.319 --> 00:02:29.840
+believe me it is but what happens when
+00:02:27.440 --> 00:02:31.760
+the choices become complicated as in the
+00:02:29.840 --> 00:02:33.920
+case of song b that's when machine
+00:02:31.760 --> 00:02:35.920
+learning comes in it learns the data
+00:02:33.920 --> 00:02:38.160
+builds the prediction model and when the
+00:02:35.920 --> 00:02:40.640
+new data point comes in it can easily
+00:02:38.160 --> 00:02:43.200
+predict for it more the data better the
+00:02:40.640 --> 00:02:45.360
+model higher will be the accuracy there
+00:02:43.200 --> 00:02:47.599
+are many ways in which the machine
+00:02:45.360 --> 00:02:49.599
+learns it could be either supervised
+00:02:47.599 --> 00:02:51.280
+learning unsupervised learning or
+00:02:49.599 --> 00:02:53.680
+reinforcement learning let's first
+00:02:51.280 --> 00:02:55.519
+quickly understand supervised learning
+00:02:53.680 --> 00:02:57.280
+suppose your friend gives you one
+00:02:55.519 --> 00:03:00.000
+million coins of three different
+00:02:57.280 --> 00:03:02.080
+currencies say one rupee one euro and
+00:03:00.000 --> 00:03:04.480
+one dirham each coin has different
+00:03:02.080 --> 00:03:07.120
+weights for example a coin of one rupee
+00:03:04.480 --> 00:03:09.519
+weighs three grams one euro weighs seven
+00:03:07.120 --> 00:03:11.440
+grams and one dirham weighs four grams
+00:03:09.519 --> 00:03:13.920
+your model will predict the currency of
+00:03:11.440 --> 00:03:16.400
+the coin here your weight becomes the
+00:03:13.920 --> 00:03:18.400
+feature of coins while currency becomes
+00:03:16.400 --> 00:03:21.040
+the label when you feed this data to the
+00:03:18.400 --> 00:03:23.680
+machine learning model it learns which
+00:03:21.040 --> 00:03:26.319
+feature is associated with which label
+00:03:23.680 --> 00:03:28.959
+for example it will learn that if a coin
+00:03:26.319 --> 00:03:30.560
+is of 3 grams it will be a 1 rupee coin
+00:03:28.959 --> 00:03:32.879
+let's give a new coin to the machine on
+00:03:30.560 --> 00:03:34.959
+the basis of the weight of the new coin
+00:03:32.879 --> 00:03:37.599
+your model will predict the currency
+00:03:34.959 --> 00:03:40.000
+hence supervised learning uses labeled
+00:03:37.599 --> 00:03:42.400
+data to train the model here the machine
+00:03:40.000 --> 00:03:44.159
+knew the features of the object and also
+00:03:42.400 --> 00:03:46.159
+the labels associated with those
+00:03:44.159 --> 00:03:47.760
+features on this note let's move to
+00:03:46.159 --> 00:03:49.760
+unsupervised learning and see the
+00:03:47.760 --> 00:03:51.440
+difference suppose you have cricket data
+00:03:49.760 --> 00:03:53.760
+set of various players with their
+00:03:51.440 --> 00:03:56.319
+respective scores and wickets taken when
+00:03:53.760 --> 00:03:58.640
+you feed this data set to the machine
+00:03:56.319 --> 00:04:00.959
+the machine identifies the pattern of
+00:03:58.640 --> 00:04:02.319
+player performance so it plots this data
+00:04:00.959 --> 00:04:04.799
+with the respective wickets on the
+00:04:02.319 --> 00:04:06.799
+x-axis while runs on the y-axis while
+00:04:04.799 --> 00:04:08.879
+looking at the data you'll clearly see
+00:04:06.799 --> 00:04:10.879
+that there are two clusters the one
+00:04:08.879 --> 00:04:13.280
+cluster are the players who scored
+00:04:10.879 --> 00:04:15.680
+higher runs and took less wickets while
+00:04:13.280 --> 00:04:18.000
+the other cluster is of the players who
+00:04:15.680 --> 00:04:20.560
+scored less runs but took many wickets
+00:04:18.000 --> 00:04:22.800
+so here we interpret these two clusters
+00:04:20.560 --> 00:04:24.800
+as batsmen and bowlers the important
+00:04:22.800 --> 00:04:27.520
+point to note here is that there were no
+00:04:24.800 --> 00:04:29.759
+labels of batsmen and bowlers hence the
+00:04:27.520 --> 00:04:31.360
+learning with unlabeled data is
+00:04:29.759 --> 00:04:33.199
+unsupervised learning so we saw
+00:04:31.360 --> 00:04:35.199
+supervised learning where the data was
+00:04:33.199 --> 00:04:37.520
+labeled and the unsupervised learning
+00:04:35.199 --> 00:04:39.360
+where the data was unlabeled and then
+00:04:37.520 --> 00:04:41.280
+there is reinforcement learning which is
+00:04:39.360 --> 00:04:42.560
+a reward based learning or we can say
+00:04:41.280 --> 00:04:44.639
+that it works on the principle of
+00:04:42.560 --> 00:04:46.960
+feedback here let's say you provide the
+00:04:44.639 --> 00:04:49.919
+system with an image of a dog and ask it
+00:04:46.960 --> 00:04:52.080
+to identify it the system identifies it
+00:04:49.919 --> 00:04:54.000
+as a cat so you give a negative feedback
+00:04:52.080 --> 00:04:55.600
+to the machine saying that it's a dog's
+00:04:54.000 --> 00:04:57.759
+image the machine will learn from the
+00:04:55.600 --> 00:04:59.919
+feedback and finally if it comes across
+00:04:57.759 --> 00:05:01.919
+any other image of a dog it will be able
+00:04:59.919 --> 00:05:03.840
+to classify it correctly that is
+00:05:01.919 --> 00:05:05.520
+reinforcement learning to generalize
+00:05:03.840 --> 00:05:07.680
+machine learning model let's see a
+00:05:05.520 --> 00:05:09.280
+flowchart input is given to a machine
+00:05:07.680 --> 00:05:10.960
+learning model which then gives the
+00:05:09.280 --> 00:05:13.520
+output according to the algorithm
+00:05:10.960 --> 00:05:16.000
+applied if it's right we take the output
+00:05:13.520 --> 00:05:18.080
+as a final result else we provide
+00:05:16.000 --> 00:05:20.639
+feedback to the training model and ask
+00:05:18.080 --> 00:05:22.160
+it to predict until it learns i hope
+00:05:20.639 --> 00:05:23.919
+you've understood supervised and
+00:05:22.160 --> 00:05:26.240
+unsupervised learning so let's have a
+00:05:23.919 --> 00:05:28.720
+quick quiz you have to determine whether
+00:05:26.240 --> 00:05:30.560
+the given scenarios uses supervised or
+00:05:28.720 --> 00:05:32.880
+unsupervised learning simple right
+00:05:30.560 --> 00:05:35.039
+scenario one facebook recognizes your
+00:05:32.880 --> 00:05:37.520
+friend in a picture from an album of
+00:05:35.039 --> 00:05:40.639
+tagged photographs
+00:05:37.520 --> 00:05:43.840
+scenario 2 netflix recommends new movies
+00:05:40.639 --> 00:05:46.400
+based on someone's past movie choices
+00:05:43.840 --> 00:05:48.800
+scenario 3 analyzing bank data for
+00:05:46.400 --> 00:05:51.120
+suspicious transactions and flagging the
+00:05:48.800 --> 00:05:53.360
+fraud transactions think wisely and
+00:05:51.120 --> 00:05:55.440
+comment below your answers moving on
+00:05:53.360 --> 00:05:57.680
+don't you sometimes wonder how is
+00:05:55.440 --> 00:05:59.280
+machine learning possible in today's era
+00:05:57.680 --> 00:06:02.000
+well that's because today we have
+00:05:59.280 --> 00:06:04.479
+humongous data available everybody is
+00:06:02.000 --> 00:06:06.240
+online either making a transaction or
+00:06:04.479 --> 00:06:08.560
+just surfing the internet and that's
+00:06:06.240 --> 00:06:10.960
+generating a huge amount of data every
+00:06:08.560 --> 00:06:13.440
+minute and that data my friend is the
+00:06:10.960 --> 00:06:15.520
+key to analysis also the memory handling
+00:06:13.440 --> 00:06:17.360
+capabilities of computers have largely
+00:06:15.520 --> 00:06:20.479
+increased which helps them to process
+00:06:17.360 --> 00:06:23.280
+such huge amount of data at hand without
+00:06:20.479 --> 00:06:25.360
+any delay and yes computers now have
+00:06:23.280 --> 00:06:27.280
+great computational powers so there are
+00:06:25.360 --> 00:06:29.520
+a lot of applications of machine
+00:06:27.280 --> 00:06:31.280
+learning out there to name a few machine
+00:06:29.520 --> 00:06:33.440
+learning is used in healthcare where
+00:06:31.280 --> 00:06:35.440
+diagnostics are predicted for doctor's
+00:06:33.440 --> 00:06:37.759
+review the sentiment analysis that the
+00:06:35.440 --> 00:06:39.600
+tech giants are doing on social media is
+00:06:37.759 --> 00:06:41.360
+another interesting application of
+00:06:39.600 --> 00:06:43.280
+machine learning fraud detection in the
+00:06:41.360 --> 00:06:45.520
+finance sector and also to predict
+00:06:43.280 --> 00:06:47.120
+customer churn in the e-commerce sector
+00:06:45.520 --> 00:06:49.759
+while booking a gap you must have
+00:06:47.120 --> 00:06:51.520
+encountered surge pricing often where it
+00:06:49.759 --> 00:06:54.240
+says the fair of your trip has been
+00:06:51.520 --> 00:06:56.000
+updated continue booking yes please i'm
+00:06:54.240 --> 00:06:58.160
+getting late for office
+00:06:56.000 --> 00:07:00.240
+well that's an interesting machine
+00:06:58.160 --> 00:07:02.639
+learning model which is used by global
+00:07:00.240 --> 00:07:04.639
+taxi giant uber and others where they
+00:07:02.639 --> 00:07:06.560
+have differential pricing in real time
+00:07:04.639 --> 00:07:10.000
+based on demand the number of cars
+00:07:06.560 --> 00:07:12.560
+available bad weather rush r etc so they
+00:07:10.000 --> 00:07:14.800
+use the surge pricing model to ensure
+00:07:12.560 --> 00:07:17.280
+that those who need a cab can get one
+00:07:14.800 --> 00:07:19.599
+also it uses predictive modeling to
+00:07:17.280 --> 00:07:21.680
+predict where the demand will be high
+00:07:19.599 --> 00:07:23.759
+with the goal that drivers can take care
+00:07:21.680 --> 00:07:26.319
+of the demand and search pricing can be
+00:07:23.759 --> 00:07:29.280
+minimized great hey siri can you remind
+00:07:26.319 --> 00:07:30.400
+me to book a cab at 6 pm today ok i'll
+00:07:29.280 --> 00:07:33.120
+remind you
+00:07:30.400 --> 00:07:35.520
+thanks no problem comment below some
+00:07:33.120 --> 00:07:37.360
+interesting everyday examples around you
+00:07:35.520 --> 00:07:39.840
+where machines are learning and doing
+00:07:37.360 --> 00:07:41.840
+amazing jobs so that's all for machine
+00:07:39.840 --> 00:07:43.680
+learning basics today from my site keep
+00:07:41.840 --> 00:07:48.199
+watching this space for more interesting
+00:07:43.680 --> 00:07:48.199
+videos until then happy learning

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+flask==3.0.3
+gunicorn==21.2.0
+faiss-cpu==1.7.4
+sentence-transformers==2.7.0
+transformers==4.44.2
+--extra-index-url https://download.pytorch.org/whl/cu121
+torch==2.3.1+cu121
+nltk==3.9.1
+spacy==3.7.5
+deepsegment==2.5.1
+pandas==2.2.2
+scikit-learn==1.5.1
+webvtt-py==0.4.6
+markupsafe==2.1.5
+rank-bm25==0.2.2

templates/index.html ADDED Viewed

	@@ -0,0 +1,332 @@

+<!-- Search page (index.html) — input, autocomplete, and theme toggle -->
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <title>Subtitle Search</title>
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <style>
+    :root{
+      --maxw: 720px;
+      --radius: 8px;
+      --blue: #0b5fff;
+      --blue-hover: #0848c9;
+      --border: #d0d7de;
+    }
+    body {
+      font-family: system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif;
+      padding: 30px;
+      color: #222;
+      background: #fff;
+      display: flex;
+      flex-direction: column;
+      min-height: 100vh;
+      align-items: center;
+      padding-top: 22vh;
+    }
+    h1 { margin-bottom: 16px; color: #222; }
+    .search-wrap {
+      position: relative;
+      max-width: 600px;
+      width: 100%;
+      margin: 0 auto;
+    }
+    .search-row {
+      display: flex;
+      gap: 0;
+      align-items: stretch;
+    }
+    #queryInput {
+      flex: 1 1 auto;
+      min-width: 0;
+      height: 42px;
+      box-sizing: border-box;
+      padding: 10px 12px 10px 40px;
+      font-size: 16px;
+      border: 1px solid var(--border);
+      border-radius: var(--radius) 0 0 var(--radius);
+      background:
+        url("data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' width='18' height='18' viewBox='0 0 24 24' fill='none' stroke='%2399a3ad' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'><circle cx='11' cy='11' r='8'/><line x1='21' y1='21' x2='16.65' y2='16.65'/></svg>")
+        no-repeat 12px center / 18px 18px #fff;
+      color: #111;
+      background-color: #fff;
+    }
+    #queryInput::placeholder { color: #6b7280; }
+    #queryInput:focus {
+      outline: none;
+      border-color: var(--blue);
+      box-shadow: 0 0 0 3px rgba(11,95,255,.15);
+    }
+    .search-btn {
+      height: 42px;
+      box-sizing: border-box;
+      line-height: 42px;
+      padding: 0 16px;
+      font-size: 15px;
+      font-weight: 600;
+      color: #fff;
+      background: var(--blue);
+      border: 1px solid var(--blue);
+      border-radius: 0 var(--radius) var(--radius) 0;
+      cursor: pointer;
+      flex: 0 0 auto;
+    }
+    .search-btn:hover { background: var(--blue-hover); border-color: var(--blue-hover); }
+    #suggestions {
+      border: 1px solid #ccc;
+      border-radius: 6px;
+      max-width: var(--maxw);
+      margin-top: 6px;
+      padding: 0;
+      list-style: none;
+      background: #fff;
+      position: absolute;
+      top: calc(42px + 6px);
+      left: 0;
+      width: 100%;
+      z-index: 10;
+      display: none;
+      box-shadow: 0 8px 16px rgba(0,0,0,0.08);
+      overflow: hidden;
+      color: #111;
+    }
+    #suggestions.show { display: block; }
+    #suggestions li {
+      padding: 10px 12px;
+      cursor: pointer;
+      line-height: 1.3;
+    }
+    #suggestions li:hover,
+    #suggestions li.selected { background: #f0f6ff; }
+    .no-suggestions {
+      color: #666;
+      font-style: italic;
+      padding: 10px 12px;
+    }
+    #loading {
+      font-size: 14px;
+      color: #666;
+      margin-top: 6px;
+      display: none;
+    }
+    body, #queryInput, #suggestions, .search-btn, .theme-toggle {
+      transition: background-color .2s, color .2s, border-color .2s, box-shadow .2s;
+    }
+    .theme-toggle {
+      position: fixed; top: 16px; right: 16px;
+      background: #f9f9f9; color: #222;
+      border: 1px solid #00000022; padding: 8px 12px; border-radius: 8px;
+      font-weight: 600; cursor: pointer; box-shadow: 0 4px 12px rgba(0,0,0,0.08);
+    }
+    .theme-toggle:hover { box-shadow: 0 6px 18px rgba(0,0,0,0.15); }
+    html[data-theme="dark"] body { background: #0e0f12; color: #e7e9ee; }
+    html[data-theme="dark"] h1 { color: #e7e9ee; }
+    html[data-theme="dark"] #queryInput {
+      background-color: #15171c;
+      color: #e7e9ee;
+      border-color: #333;
+    }
+    html[data-theme="dark"] #queryInput::placeholder { color: #b3b8c4; }
+    html[data-theme="dark"] #suggestions {
+      background: #15171c;
+      color: #e7e9ee;
+      border-color: #333;
+      box-shadow: 0 8px 16px rgba(0,0,0,0.4);
+    }
+    html[data-theme="dark"] #suggestions li { color: #e7e9ee; }
+    html[data-theme="dark"] #suggestions li:hover,
+    html[data-theme="dark"] li.selected { background: #1d2026; }
+    html[data-theme="dark"] .no-suggestions { color: #b3b8c4; }
+    html[data-theme="dark"] #loading { color: #b3b8c4; }
+    html[data-theme="dark"] .search-btn {
+      background: #2d7ed8;
+      border-color: #2d7ed8;
+      color: #fff;
+    }
+    html[data-theme="dark"] .search-btn:hover {
+      background: #2464ac;
+      border-color: #2464ac;
+    }
+    html[data-theme="dark"] .theme-toggle {
+      background: #15171c; color: #e7e9ee; border-color: #333;
+      box-shadow: 0 4px 12px rgba(0,0,0,0.4);
+    }
+  </style>
+</head>
+<body>
+  <!-- Theme toggle -->
+  <button id="themeToggle" class="theme-toggle" onclick="__toggleTheme()">🌙 Dark</button>
+  <h1>Keyword Search</h1>
+  <!-- Search form with autocomplete -->
+  <form action="/search" method="POST" autocomplete="off" role="search" class="search-wrap">
+    <div class="search-row">
+      <input
+        type="text"
+        name="query"
+        id="queryInput"
+        placeholder="Enter your query here (e.g., neural networks)"
+        size="50"
+        aria-label="Search input"
+        aria-autocomplete="list"
+        aria-controls="suggestions"
+        aria-expanded="false"
+        aria-haspopup="listbox">
+      <button type="submit" class="search-btn">Search</button>
+    </div>
+    <div id="loading" aria-live="polite">Loading suggestions…</div>
+    <ul id="suggestions" role="listbox" aria-labelledby="queryInput"></ul>
+  </form>
+  <!-- Version tag -->
+  <div style="font-size:0.8em; color:#888; margin-top:6px; text-align:center;">
+    Version 1.1
+  </div>
+  <!-- Theme toggle logic -->
+  <script>
+    (function() {
+      const saved = localStorage.getItem('theme');
+      if (saved) document.documentElement.dataset.theme = saved;
+      function setTheme(t) {
+        document.documentElement.dataset.theme = t;
+        localStorage.setItem('theme', t);
+        const btn = document.getElementById('themeToggle');
+        if (btn) btn.textContent = t === 'dark' ? ' Light' : ' Dark';
+      }
+      window.__toggleTheme = function() {
+        const next = (document.documentElement.dataset.theme === 'dark') ? 'light' : 'dark';
+        setTheme(next);
+      };
+      document.addEventListener('DOMContentLoaded', () => {
+        const cur = document.documentElement.dataset.theme || 'light';
+        const btn = document.getElementById('themeToggle');
+        if (btn) btn.textContent = cur === 'dark' ? ' Light' : ' Dark';
+      });
+    })();
+  </script>
+  <!-- Autocomplete logic -->
+  <script>
+    const input = document.getElementById('queryInput');
+    const suggestionBox = document.getElementById('suggestions');
+    const loadingEl = document.getElementById('loading');
+    const escapeHtml = (str) =>
+      str.replace(/[&<>"']/g, t => ({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[t]));
+    let selectedIndex = -1;
+    input.addEventListener('input', async () => {
+      const term = input.value.trim();
+      suggestionBox.classList.remove('show');
+      suggestionBox.innerHTML = '';
+      input.setAttribute('aria-expanded', 'false');
+      selectedIndex = -1;
+      if (term.length < 2) {
+        loadingEl.style.display = 'none';
+        return;
+      }
+      loadingEl.style.display = 'block';
+      try {
+        const res = await fetch(`/autocomplete?term=${encodeURIComponent(term)}`);
+        const suggestions = await res.json();
+        loadingEl.style.display = 'none';
+        if (!Array.isArray(suggestions) || suggestions.length === 0) {
+          suggestionBox.innerHTML = '<li class="no-suggestions" role="option" aria-disabled="true">No suggestions found</li>';
+          suggestionBox.classList.add('show');
+          input.setAttribute('aria-expanded', 'true');
+          return;
+        }
+        suggestionBox.innerHTML = suggestions
+          .map(s => `<li role="option">${escapeHtml(s)}</li>`)
+          .join('');
+        suggestionBox.classList.add('show');
+        input.setAttribute('aria-expanded', 'true');
+      } catch (err) {
+        console.error('Autocomplete error:', err);
+        loadingEl.style.display = 'none';
+      }
+    });
+    suggestionBox.addEventListener('click', (e) => {
+      const li = e.target.closest('li[role="option"]');
+      if (!li || li.classList.contains('no-suggestions')) return;
+      input.value = li.textContent;
+      suggestionBox.classList.remove('show');
+      suggestionBox.innerHTML = '';
+      input.setAttribute('aria-expanded', 'false');
+    });
+    input.addEventListener('keydown', (e) => {
+      const items = suggestionBox.querySelectorAll('li[role="option"]:not(.no-suggestions)');
+      if (!items.length) return;
+      if (e.key === 'ArrowDown') {
+        e.preventDefault();
+        selectedIndex = Math.min(selectedIndex + 1, items.length - 1);
+        updateSelection(items);
+      } else if (e.key === 'ArrowUp') {
+        e.preventDefault();
+        selectedIndex = Math.max(selectedIndex - 1, 0);
+        updateSelection(items);
+      } else if (e.key === 'Enter') {
+        if (selectedIndex >= 0) {
+          e.preventDefault();
+          input.value = items[selectedIndex].textContent;
+          suggestionBox.classList.remove('show');
+          suggestionBox.innerHTML = '';
+          input.setAttribute('aria-expanded', 'false');
+        }
+      } else if (e.key === 'Escape') {
+        suggestionBox.classList.remove('show');
+        suggestionBox.innerHTML = '';
+        input.setAttribute('aria-expanded', 'false');
+      }
+    });
+    function updateSelection(items) {
+      items.forEach((item, i) => item.classList.toggle('selected', i === selectedIndex));
+      const active = items[selectedIndex];
+      if (active) active.scrollIntoView({ block: 'nearest' });
+    }
+    document.addEventListener('click', (e) => {
+      if (!e.target.closest('form')) {
+        suggestionBox.classList.remove('show');
+        suggestionBox.innerHTML = '';
+        input.setAttribute('aria-expanded', 'false');
+      }
+    });
+  </script>
+</body>
+</html>

templates/results.html ADDED Viewed

	@@ -0,0 +1,435 @@

+<!-- Results page (results.html) — shows search results with semantic toggle, autocomplete, and pagination -->
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <title>Search Results</title>
+  <link rel="preconnect" href="https://www.youtube.com">
+  <link rel="preconnect" href="https://img.youtube.com">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <!-- Theme bootstrap -->
+  <script>
+    try {
+      var t = localStorage.getItem('theme');
+      if (t) { document.documentElement.dataset.theme = t; }
+    } catch (e) {}
+  </script>
+  <style>
+    :root {
+      --bg: #ffffff;
+      --bg-soft: #f9f9f9;
+      --text: #222;
+      --text-muted: #555;
+      --accent: #007bff;
+      --accent-dark: #0056b3;
+      --good: #28a745;
+      --good-dark: #218838;
+      --warn-bg: #fff3cd;
+      --warn-text: #856404;
+      --warn-accent: #ffc107;
+      --shadow: rgba(0,0,0,0.08);
+      --shadow-strong: rgba(0,0,0,0.15);
+      --border: #eee;
+    }
+    html[data-theme="dark"] {
+      --bg: #0e0f12;
+      --bg-soft: #15171c;
+      --text: #e7e9ee;
+      --text-muted: #b3b8c4;
+      --accent: #4ea1ff;
+      --accent-dark: #2d7ed8;
+      --good: #39d353;
+      --good-dark: #29943a;
+      --warn-bg: #2a2206;
+      --warn-text: #f0d08a;
+      --warn-accent: #ffd15a;
+      --shadow: rgba(0,0,0,0.4);
+      --shadow-strong: rgba(0,0,0,0.6);
+      --border: #333;
+    }
+    * { box-sizing: border-box; }
+    body {
+      font-family: system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif;
+      padding: 30px;
+      background: linear-gradient(to bottom right, #f3f4f6, #ffffff);
+      color: var(--text);
+    }
+    body, .result-card, .context-block { transition: background-color .2s, color .2s, box-shadow .2s; }
+    html[data-theme="dark"] body { background: #0e0f12; }
+    h1 { color: var(--text); margin-bottom: 10px; }
+    /*  Search bar */
+    .search-wrap {
+      position: relative;
+      max-width: 600px;
+      width: 100%;
+      margin: 0 auto 20px auto;
+    }
+    .search-row {
+      display: flex;
+      gap: 0;
+      align-items: stretch;
+    }
+    #queryInput {
+      flex: 1 1 auto;
+      min-width: 0;
+      height: 42px;
+      box-sizing: border-box;
+      padding: 10px 12px 10px 40px;
+      font-size: 16px;
+      border: 1px solid var(--border);
+      border-radius: 8px 0 0 8px;
+      background:
+        url("data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' width='18' height='18' viewBox='0 0 24 24' fill='none' stroke='%2399a3ad' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'><circle cx='11' cy='11' r='8'/><line x1='21' y1='21' x2='16.65' y2='16.65'/></svg>")
+        no-repeat 12px center / 18px 18px #fff;
+      color: #111;
+      background-color: #fff;
+    }
+    #queryInput::placeholder { color: #6b7280; }
+    .search-btn {
+      height: 42px;
+      padding: 0 16px;
+      font-size: 15px;
+      font-weight: 600;
+      color: #fff;
+      background: var(--accent);
+      border: 1px solid var(--accent);
+      border-radius: 0 8px 8px 0;
+      cursor: pointer;
+      flex: 0 0 auto;
+    }
+    .search-btn:hover { background: var(--accent-dark); border-color: var(--accent-dark); }
+    /*  Autocomplete dropdown */
+    #loading {
+      font-size: 14px;
+      color: #666;
+      margin-top: 6px;
+      display: none;
+    }
+    #suggestions {
+      border: 1px solid #ccc;
+      border-radius: 6px;
+      max-width: 600px;
+      margin-top: 6px;
+      padding: 0;
+      list-style: none;
+      background: #fff;
+      position: absolute;
+      top: calc(42px + 6px);
+      left: 0;
+      width: 100%;
+      z-index: 10;
+      display: none;
+      box-shadow: 0 8px 16px rgba(0,0,0,0.08);
+      overflow: hidden;
+      color: #111;
+    }
+    #suggestions.show { display: block; }
+    #suggestions li {
+      padding: 10px 12px;
+      cursor: pointer;
+      line-height: 1.3;
+    }
+    #suggestions li:hover,
+    #suggestions li.selected { background: #f0f6ff; }
+    .no-suggestions {
+      color: #666;
+      font-style: italic;
+      padding: 10px 12px;
+    }
+    /* Dark theme overrides */
+    html[data-theme="dark"] #queryInput {
+      background-color: #15171c;
+      color: #e7e9ee;
+      border-color: #333;
+    }
+    html[data-theme="dark"] #queryInput::placeholder { color: #b3b8c4; }
+    html[data-theme="dark"] .search-btn {
+      background: #2d7ed8; border-color: #2d7ed8; color: #fff;
+    }
+    html[data-theme="dark"] .search-btn:hover {
+      background: #2464ac; border-color: #2464ac;
+    }
+    html[data-theme="dark"] #suggestions {
+      background: #15171c;
+      color: #e7e9ee;
+      border-color: #333;
+      box-shadow: 0 8px 16px rgba(0,0,0,0.4);
+    }
+    html[data-theme="dark"] #suggestions li { color: #e7e9ee; }
+    html[data-theme="dark"] #suggestions li:hover,
+    html[data-theme="dark"] #suggestions li.selected { background: #1d2026; }
+    html[data-theme="dark"] .no-suggestions { color: var(--text-muted); }
+    html[data-theme="dark"] #loading { color: var(--text-muted); }
+    .meta-count { margin-bottom: 24px; color: #333; }
+    html[data-theme="dark"] .meta-count { color: var(--text); }
+    /* Result cards */
+    .result-card {
+      background: var(--bg);
+      border-radius: 12px;
+      box-shadow: 0 4px 12px var(--shadow);
+      padding: 20px;
+      margin-bottom: 30px;
+      transition: 0.25s ease;
+    }
+    .result-card:hover { transform: translateY(-2px); box-shadow: 0 6px 18px var(--shadow-strong); }
+    .video-title { font-size: 20px; font-weight: 700; color: #333; margin-bottom: 6px; line-height: 1.25; word-break: break-word; }
+    html[data-theme="dark"] .video-title { color: var(--text); }
+    .timestamp { font-size: 14px; color: #666; margin-bottom: 8px; }
+    html[data-theme="dark"] .timestamp { color: var(--text-muted); }
+    .thumb-wrap { margin: 10px 0 6px; }
+    .thumbnail { width: 320px; max-width: 100%; height: auto; border-radius: 8px; border: 1px solid var(--border); }
+    .jump-link {
+      display: inline-block;
+      margin-top: 8px;
+      padding: 10px 16px;
+      background-color: var(--accent);
+      color: #fff;
+      text-decoration: none;
+      border-radius: 6px;
+      font-weight: 600;
+    }
+    .jump-link:hover { background-color: var(--accent-dark); }
+    .context-block {
+      background: var(--bg-soft);
+      border-left: 4px solid var(--accent);
+      padding: 12px;
+      margin-top: 14px;
+      white-space: pre-wrap;
+      font-size: 15px;
+      line-height: 1.5;
+      word-wrap: break-word;
+    }
+    .summary { font-style: italic; margin-top: 10px; color: var(--text-muted); word-wrap: break-word; }
+    mark { background-color: #fff59e; padding: 0 2px; border-radius: 2px; }
+    /*  Pagination */
+    .show-more-form { text-align: center; margin-top: 30px; }
+    .show-more-form button {
+      padding: 12px 20px;
+      font-size: 16px;
+      background-color: var(--good);
+      color: white;
+      border: none;
+      border-radius: 6px;
+      cursor: pointer;
+      font-weight: 600;
+    }
+    .show-more-form button:hover { background-color: var(--good-dark); }
+    /* 💡 Did you mean? suggestion */
+    .suggestion-box {
+      background: var(--warn-bg);
+      border-left: 4px solid var(--warn-accent);
+      padding: 12px 20px;
+      margin-bottom: 20px;
+      font-size: 15px;
+      color: var(--warn-text);
+    }
+    .suggestion-box form { display: inline; }
+    .suggestion-box button {
+      background: none;
+      border: none;
+      color: var(--accent);
+      font-weight: 700;
+      text-decoration: underline;
+      cursor: pointer;
+      padding: 0;
+      font-size: 15px;
+    }
+    .suggestion-box button:hover { color: var(--accent-dark); }
+    /*  Theme toggle button */
+    .theme-toggle {
+      position: fixed; top: 16px; right: 16px;
+      background: var(--bg-soft); color: var(--text);
+      border: 1px solid #00000022; padding: 8px 12px; border-radius: 8px;
+      font-weight: 600; cursor: pointer; box-shadow: 0 4px 12px var(--shadow);
+    }
+    .theme-toggle:hover { box-shadow: 0 6px 18px var(--shadow-strong); }
+    /* ✅Semantic toggle button */
+    .semantic-btn {
+      padding: 6px 12px;
+      border-radius: 8px;
+      border: 1px solid var(--border);
+      background: var(--bg-soft);
+      color: var(--text);
+      cursor: pointer;
+      font-size: 14px;
+      font-weight: 600;
+      transition: background-color 0.2s, color 0.2s, border-color 0.2s;
+    }
+    .semantic-btn.active {
+      background: var(--accent);
+      color: #fff;
+      border-color: var(--accent);
+    }
+    html[data-theme="dark"] .semantic-btn {
+      background: var(--bg-soft);
+      color: var(--text);
+      border-color: var(--border);
+    }
+    html[data-theme="dark"] .semantic-btn.active {
+      background: var(--accent);
+      color: #fff;
+      border-color: var(--accent);
+    }
+  </style>
+  <!--  Theme toggle logic -->
+  <script>
+    (function() {
+      const saved = localStorage.getItem('theme');
+      if (saved) document.documentElement.dataset.theme = saved;
+      function setTheme(t) {
+        document.documentElement.dataset.theme = t;
+        localStorage.setItem('theme', t);
+        const btn = document.getElementById('themeToggle');
+        if (btn) btn.textContent = t === 'dark' ? ' Light' : ' Dark';
+      }
+      window.__toggleTheme = function() {
+        const next = (document.documentElement.dataset.theme === 'dark') ? 'light' : 'dark';
+        setTheme(next);
+      };
+      document.addEventListener('DOMContentLoaded', () => {
+        const cur = document.documentElement.dataset.theme || 'light';
+        const btn = document.getElementById('themeToggle');
+        if (btn) btn.textContent = cur === 'dark' ? ' Light' : ' Dark';
+      });
+    })();
+  </script>
+</head>
+<body>
+  <button id="themeToggle" class="theme-toggle" onclick="__toggleTheme()"> Dark</button>
+  <!-- 🔎 Search bar at top -->
+  <form action="/search" method="POST" autocomplete="off" role="search" class="search-wrap">
+    <div class="search-row">
+      <input
+        type="text"
+        name="query"
+        id="queryInput"
+        placeholder="Enter your query here (e.g., neural networks)"
+        size="50"
+        value="{{ query }}"
+        aria-label="Search input"
+        aria-autocomplete="list"
+        aria-controls="suggestions"
+        aria-expanded="false"
+        aria-haspopup="listbox">
+      <button type="submit" class="search-btn">Search</button>
+    </div>
+    <div id="loading" aria-live="polite">Loading suggestions…</div>
+    <ul id="suggestions" role="listbox" aria-labelledby="queryInput"></ul>
+  </form>
+  <h1>🔍 Search Results for: “{{ query }}”</h1>
+  <p class="meta-count">
+    <strong>Showing {{ shown }} of {{ total_matches }} results.</strong>
+    <!-- Semantic toggle button -->
+    <form method="POST" action="/search" style="display:inline; margin-left:10px;">
+      <input type="hidden" name="query" value="{{ query }}">
+      <input type="hidden" name="start" value="0">
+      <input type="hidden" name="shown" value="0">
+      <input type="hidden" name="previous_results" value="[]">
+      <button type="submit"
+              name="semantic"
+              value="{% if semantic %}false{% else %}true{% endif %}"
+              class="semantic-btn {% if semantic %}active{% endif %}">
+        Semantic: {% if semantic %}ON{% else %}OFF{% endif %}
+      </button>
+    </form>
+    {% if semantic %}
+      <span class="semantic-note">
+        Semantic mode may return results without the exact words but with similar meaning.
+      </span>
+    {% endif %}
+  </p>
+  <!-- Did you mean? -->
+  {% if suggestion_term %}
+    <div class="suggestion-box" role="note" aria-live="polite">
+      💡 Looking for
+      <form method="POST" action="/search">
+        <input type="hidden" name="query" value="{{ suggestion_term }}">
+        <input type="hidden" name="shown" value="0">
+        <input type="hidden" name="start" value="0">
+        <input type="hidden" name="previous_results" value="[]">
+        {% if semantic %}
+          <input type="hidden" name="semantic" value="true">
+        {% endif %}
+        <button type="submit">“{{ suggestion_term }}”</button>
+      </form>?
+      Try searching that too!
+    </div>
+  {% endif %}
+  <!-- Results loop -->
+  {% for result in results %}
+    <div class="result-card">
+      <div class="video-title">{{ result.video_title }}</div>
+      {% if result.video_id and result.video_id != 'unknown' %}
+        <div class="thumb-wrap">
+          <img
+            class="thumbnail"
+            loading="lazy"
+            src="https://img.youtube.com/vi/{{ result.video_id }}/hqdefault.jpg"
+            alt="Thumbnail for {{ result.video_title }}"
+            onerror="this.onerror=null;this.src='https://via.placeholder.com/320x180?text=Thumbnail+Not+Available';">
+        </div>
+        <div class="timestamp">
+          ⏱️ <time datetime="{{ result.timestamp }}">{{ result.timestamp }}</time>
+        </div>
+        <a
+          class="jump-link"
+          href="https://www.youtube.com/watch?v={{ result.video_id }}&t={{ result.timestamp|jump_time }}s"
+          target="_blank"
+          rel="noopener">▶ Jump to Video</a>
+      {% else %}
+        <div style="color:#b00020;">⚠️ No video ID found. Cannot jump to video.</div>
+      {% endif %}
+      <div class="context-block">{{ result.highlighted_block }}</div>
+      <div class="summary"> Summary: {{ result.summary }}</div>
+    </div>
+  {% endfor %}
+  <!-- Pagination -->
+  {% if start < total_matches %}
+    <div class="show-more-form">
+      <form method="POST" action="/search">
+        <input type="hidden" name="query" value="{{ query }}">
+        <input type="hidden" name="start" value="{{ start }}">
+        <input type="hidden" name="shown" value="{{ shown }}">
+        <input type="hidden" name="previous_results" value='{{ previous_results | tojson | safe }}'>
+        {% if semantic %}
+          <input type="hidden" name="semantic" value="true">
+        {% endif %}
+        <button type="submit"> Show More Results</button>
+      </form>
+    </div>
+  {% endif %}
+  <br>
+  <a href="/">← Back to Search</a>
+</body>
+</html>