Spaces:

Daksh0505
/

Youtube-Chatbot

Running

App Files Files Community

Daksh0505 commited on Oct 4

Commit

1ab0e96

verified ·

1 Parent(s): ea1c842

Create app.py

Browse files

Files changed (1) hide show

app.py +104 -0

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import streamlit as st
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
+from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import FAISS
+from langchain.prompts import PromptTemplate
+import os
+api_key = os.getenv("HF_API_KEY")
+# 📼 Transcript Language Options
+@st.cache_data
+def get_available_languages(video_id):
+    transcriber = YouTubeTranscriptApi()
+    try:
+        transcript_info = transcriber.list(video_id)
+        return [(t.language_code, t.language) for t in transcript_info]
+    except Exception:
+        return []
+# 📼 Transcript Fetcher
+@st.cache_data
+def get_transcript(video_id, language_code):
+    transcriber = YouTubeTranscriptApi()
+    try:
+        transcript_list = transcriber.fetch(video_id, languages=[language_code])
+        return ' '.join([d.text for d in transcript_list])
+    except (NoTranscriptFound, TranscriptsDisabled):
+        return None
+    except Exception:
+        return None
+# 🧠 Embedding Loader
+@st.cache_resource
+def load_embeddings():
+    return HuggingFaceEmbeddings(
+        model_name="intfloat/multilingual-e5-base",
+        model_kwargs={"device": "cpu"}
+    )
+# 🧱 Vector Store Builder
+@st.cache_data
+def create_vector_store(transcript):
+    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    docs = splitter.create_documents([transcript])
+    return FAISS.from_documents(docs, load_embeddings())
+# 🤖 Model Builder
+def build_model(model_choice, temperature):
+    repo_id = "deepseek-ai/DeepSeek-V3.2-Exp" if model_choice == "DeepSeek" else "openai/gpt-oss-20b"
+    llm = HuggingFaceEndpoint(
+        repo_id=repo_id,
+        huggingfacehub_api_token=api_key,
+        task="text-generation"
+    )
+    return ChatHuggingFace(llm=llm, temperature=temperature)
+# 🧾 Prompt Template
+prompt_template = PromptTemplate(
+    template=(
+        "You are a helpful assistant.\n\n"
+        "Answer the question using the context provided below.\n"
+        "If the context does not mention the topic, say clearly: 'There is no mention of the topic in the video you provided.'\n"
+        "Then, based on your own knowledge, try to answer the question.\n"
+        "If both the context and your knowledge are insufficient, say: 'I don't know.'\n\n"
+        "Keep the answer format neat, clean, and human-readable.\n\n"
+        "Context:\n{context}\n\n"
+        "Question:\n{question}"
+    ),
+    input_variables=["context", "question"]
+)
+# 🚀 App UI
+st.title("🎥 YouTube Transcript Chatbot")
+video_id = st.text_input("YouTube Video ID", value="lv1_-RER4_I")
+if video_id:
+    langs = get_available_languages(video_id)
+    lang_options = [f"{name} ({code})" for code, name in langs] if langs else ["No transcript available"]
+    selected_lang = st.selectbox("Transcript Language", lang_options)
+    language_code = selected_lang.split("(")[-1].strip(")") if langs else None
+else:
+    language_code = None
+query = st.text_area("Your Query", value="What is RAG?")
+model_choice = st.radio("Model to Use", ["DeepSeek", "OpenAI"])
+temperature = st.slider("Temperature", 0, 100, value=50)
+if st.button("🚀 Run Chatbot"):
+    if not video_id or not query or not language_code:
+        st.warning("Please fill in all fields.")
+    else:
+        with st.spinner("Fetching transcript and generating response..."):
+            transcript = get_transcript(video_id, language_code)
+            if not transcript:
+                st.error("Transcript not available or disabled.")
+            else:
+                retriever = create_vector_store(transcript).as_retriever(search_type="mmr", search_kwargs={"k": 5})
+                relevant_docs = retriever.invoke(query)
+                context_text = "\n\n".join(doc.page_content for doc in relevant_docs)
+                prompt = prompt_template.invoke({"context": context_text, "question": query})
+                model = build_model(model_choice, temperature / 100.0)
+                response = model.invoke(prompt)
+                st.text_area("Model Response", value=response.content, height=400)