Spaces:
Sleeping
Sleeping
| # import nest_asyncio | |
| # from youtube_transcript_api import YouTubeTranscriptApi | |
| # import streamlit as st | |
| # import os | |
| # from groq import Groq | |
| # nest_asyncio.apply() | |
| # # --- CONFIGURATION --- | |
| # YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets | |
| # channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0 | |
| # # Initialize Groq client once | |
| # groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
| # # --- FUNCTION: Fetch recent video IDs from YouTube channel --- | |
| # def get_latest_video_ids(channel_id, max_results=5): | |
| # import requests | |
| # url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}" | |
| # response = requests.get(url) | |
| # videos = response.json().get('items', []) | |
| # return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video'] | |
| # # --- FUNCTION: Get video transcripts --- | |
| # def get_video_transcripts(video_ids): | |
| # all_transcripts = [] | |
| # for vid in video_ids: | |
| # try: | |
| # transcript = YouTubeTranscriptApi.get_transcript(vid) | |
| # text = " ".join([t['text'] for t in transcript]) | |
| # all_transcripts.append(text) | |
| # except: | |
| # continue | |
| # return all_transcripts | |
| # # --- FUNCTION: Ask Groq API using official client --- | |
| # def ask_groq(context, question): | |
| # messages = [ | |
| # {"role": "system", "content": "You are a helpful assistant."}, | |
| # {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\nAnswer:"} | |
| # ] | |
| # chat_completion = groq_client.chat.completions.create( | |
| # model="llama-3.3-70b-versatile", # Or the model you have access to | |
| # messages=messages, | |
| # ) | |
| # return chat_completion.choices[0].message.content.strip() | |
| # # --- STREAMLIT APP --- | |
| # def main(): | |
| # st.set_page_config(page_title="EduBot - YouTube Channel QA", layout="wide") | |
| # st.title("๐ EduBot for @icodeguru0") | |
| # st.markdown("Ask anything based on the channelโs recent videos.") | |
| # question = st.text_input("๐ฌ Ask your question here:") | |
| # if question: | |
| # with st.spinner("๐ Fetching videos and transcripts..."): | |
| # video_ids = get_latest_video_ids(channel_id) | |
| # transcripts = get_video_transcripts(video_ids) | |
| # full_context = "\n\n".join(transcripts) | |
| # with st.spinner("๐ง Thinking..."): | |
| # answer = ask_groq(full_context, question) | |
| # st.success(answer) | |
| # st.markdown("---") | |
| # st.caption("Powered by YouTube + Groq | Built for @icodeguru0") | |
| # if __name__ == "__main__": | |
| # main() | |
| # # import nest_asyncio | |
| # # from youtube_transcript_api import YouTubeTranscriptApi | |
| # # import streamlit as st | |
| # # import os | |
| # # from groq import Groq | |
| # # import requests | |
| # # from bs4 import BeautifulSoup | |
| # # nest_asyncio.apply() | |
| # # # --- CONFIGURATION --- | |
| # # YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") # Set in your HuggingFace Secrets | |
| # # channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # @icodeguru0 | |
| # # BASE_URL = "https://icode.guru" | |
| # # # Initialize Groq client once | |
| # # groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
| # # # --- FUNCTION: Fetch recent video IDs from YouTube channel --- | |
| # # def get_latest_video_ids(channel_id, max_results=5): | |
| # # url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}" | |
| # # response = requests.get(url) | |
| # # videos = response.json().get('items', []) | |
| # # return [v['id']['videoId'] for v in videos if v['id']['kind'] == 'youtube#video'] | |
| # # # --- FUNCTION: Get video transcripts --- | |
| # # def get_video_transcripts(video_ids): | |
| # # all_transcripts = [] | |
| # # for vid in video_ids: | |
| # # try: | |
| # # transcript = YouTubeTranscriptApi.get_transcript(vid) | |
| # # text = " ".join([t['text'] for t in transcript]) | |
| # # all_transcripts.append(text) | |
| # # except: | |
| # # continue | |
| # # return all_transcripts | |
| # # # --- NEW FUNCTION: Scrape icode.guru --- | |
| # # def scrape_icodeguru(base_url="https://icode.guru", max_pages=5): | |
| # # visited = set() | |
| # # blocks = [] | |
| # # def crawl(url): | |
| # # if url in visited or len(visited) >= max_pages: | |
| # # return | |
| # # visited.add(url) | |
| # # try: | |
| # # res = requests.get(url, timeout=10) | |
| # # soup = BeautifulSoup(res.content, "html.parser") | |
| # # page_text = soup.get_text(separator=" ", strip=True) | |
| # # if len(page_text) > 100: | |
| # # blocks.append(f"[Source]({url}):\n{page_text[:2000]}") | |
| # # for link in soup.find_all("a", href=True): | |
| # # href = link['href'] | |
| # # if href.startswith("/"): | |
| # # href = base_url + href | |
| # # if href.startswith(base_url): | |
| # # crawl(href) | |
| # # except: | |
| # # pass | |
| # # crawl(base_url) | |
| # # return blocks | |
| # # # --- FUNCTION: Ask Groq API using official client --- | |
| # # def ask_groq(context, question): | |
| # # messages = [ | |
| # # {"role": "system", "content": "You are a helpful assistant. Only answer using the given context (YouTube + icode.guru). Provide links if possible."}, | |
| # # {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"} | |
| # # ] | |
| # # chat_completion = groq_client.chat.completions.create( | |
| # # model="llama-3.3-70b-versatile", # Or the model you have access to | |
| # # messages=messages, | |
| # # ) | |
| # # return chat_completion.choices[0].message.content.strip() | |
| # # # --- STREAMLIT APP --- | |
| # # def main(): | |
| # # st.set_page_config(page_title="EduBot - YouTube + iCodeGuru QA", layout="wide") | |
| # # st.title("๐ EduBot for @icodeguru0") | |
| # # st.markdown("Ask anything based on the channelโs recent videos and website content from [icode.guru](https://icode.guru).") | |
| # # question = st.text_input("๐ฌ Ask your question here:") | |
| # # if question: | |
| # # with st.spinner("๐ Fetching videos and transcripts..."): | |
| # # video_ids = get_latest_video_ids(channel_id) | |
| # # transcripts = get_video_transcripts(video_ids) | |
| # # yt_context = "\n\n".join(transcripts) | |
| # # with st.spinner("๐ Scraping icode.guru..."): | |
| # # site_blocks = scrape_icodeguru(BASE_URL, max_pages=5) | |
| # # site_context = "\n\n".join(site_blocks) | |
| # # full_context = yt_context + "\n\n" + site_context | |
| # # with st.spinner("๐ง Thinking..."): | |
| # # answer = ask_groq(full_context, question) | |
| # # st.success(answer) | |
| # # st.markdown("---") | |
| # # st.caption("Powered by YouTube + iCodeGuru + Groq | Built for @icodeguru0") | |
| # # if __name__ == "__main__": | |
| # # main() | |
| #(youtube+web) | |
| # import nest_asyncio | |
| # import streamlit as st | |
| # import os | |
| # import requests | |
| # from youtube_transcript_api import YouTubeTranscriptApi | |
| # from groq import Groq | |
| # from bs4 import BeautifulSoup | |
| # nest_asyncio.apply() | |
| # # --- CONFIGURATION --- | |
| # YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") | |
| # GROQ_API_KEY = os.environ.get("GROQ_API_KEY") | |
| # channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # iCodeGuru | |
| # BASE_URL = "https://icode.guru" | |
| # groq_client = Groq(api_key=GROQ_API_KEY) | |
| # # --- Fetch recent video IDs from YouTube channel --- | |
| # def get_latest_video_ids(channel_id, max_results=5): | |
| # url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}" | |
| # response = requests.get(url) | |
| # videos = response.json().get('items', []) | |
| # valid_videos = [] | |
| # for v in videos: | |
| # if v['id']['kind'] == 'youtube#video': | |
| # title = v['snippet']['title'] | |
| # channel_title = v['snippet']['channelTitle'] | |
| # video_id = v['id']['videoId'] | |
| # if "icodeguru" in channel_title.lower(): # โ Extra validation | |
| # valid_videos.append((video_id, title)) | |
| # return valid_videos | |
| # # --- Get video transcripts --- | |
| # def get_video_transcripts(video_info): | |
| # results = [] | |
| # for vid, title in video_info: | |
| # try: | |
| # transcript = YouTubeTranscriptApi.get_transcript(vid) | |
| # text = " ".join([t['text'] for t in transcript]) | |
| # video_link = f"https://www.youtube.com/watch?v={vid}" | |
| # results.append({ | |
| # "video_id": vid, | |
| # "title": title, | |
| # "link": video_link, | |
| # "transcript": text | |
| # }) | |
| # except Exception as e: | |
| # continue | |
| # return results | |
| # # --- Scrape icode.guru --- | |
| # def scrape_icodeguru(base_url=BASE_URL, max_pages=5): | |
| # visited = set() | |
| # blocks = [] | |
| # def crawl(url): | |
| # if url in visited or len(visited) >= max_pages: | |
| # return | |
| # visited.add(url) | |
| # try: | |
| # res = requests.get(url, timeout=10) | |
| # soup = BeautifulSoup(res.content, "html.parser") | |
| # page_text = soup.get_text(separator=" ", strip=True) | |
| # if len(page_text) > 100: | |
| # blocks.append(f"[{url}]({url}):\n{page_text[:1500]}") | |
| # for link in soup.find_all("a", href=True): | |
| # href = link['href'] | |
| # if href.startswith("/"): | |
| # href = base_url + href | |
| # if href.startswith(base_url): | |
| # crawl(href) | |
| # except: | |
| # pass | |
| # crawl(base_url) | |
| # return blocks | |
| # # --- Ask Groq --- | |
| # def ask_groq(context, question): | |
| # messages = [ | |
| # {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."}, | |
| # {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"} | |
| # ] | |
| # chat_completion = groq_client.chat.completions.create( | |
| # model="llama-3.3-70b-versatile", | |
| # messages=messages, | |
| # ) | |
| # return chat_completion.choices[0].message.content.strip() | |
| # #--- STREAMLIT APP --- | |
| # def main(): | |
| # st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide") | |
| # st.title("๐ EduBot for @icodeguru0") | |
| # st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).") | |
| # question = st.text_input("๐ฌ Ask your question:") | |
| # if question: | |
| # with st.spinner("๐บ Fetching YouTube videos..."): | |
| # video_info = get_latest_video_ids(channel_id, max_results=5) | |
| # transcripts = get_video_transcripts(video_info) | |
| # yt_context = "" | |
| # relevant_links = [] | |
| # for vid in transcripts: | |
| # yt_context += f"\n\n[Video: {vid['title']}]({vid['link']}):\n{vid['transcript'][:1500]}" | |
| # if question.lower() in vid['transcript'].lower(): | |
| # relevant_links.append(vid['link']) | |
| # with st.spinner("๐ Scraping icode.guru..."): | |
| # site_blocks = scrape_icodeguru(BASE_URL, max_pages=5) | |
| # site_context = "\n\n".join(site_blocks) | |
| # full_context = yt_context + "\n\n" + site_context | |
| # with st.spinner("๐ง Thinking..."): | |
| # answer = ask_groq(full_context, question) | |
| # st.success(answer) | |
| # if relevant_links: | |
| # st.markdown("### ๐ Related YouTube Links") | |
| # for link in relevant_links: | |
| # st.markdown(f"- [Watch Video]({link})") | |
| # st.markdown("---") | |
| # st.caption("Powered by YouTube, iCodeGuru, and Groq") | |
| # if __name__ == "__main__": | |
| # main() | |
| # (vectordb) | |
| # import nest_asyncio | |
| # import streamlit as st | |
| # import os | |
| # import requests | |
| # from youtube_transcript_api import YouTubeTranscriptApi | |
| # from groq import Groq | |
| # from bs4 import BeautifulSoup | |
| # from sentence_transformers import SentenceTransformer | |
| # import chromadb | |
| # from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction | |
| # import json | |
| # nest_asyncio.apply() | |
| # # --- CONFIGURATION --- | |
| # YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") | |
| # GROQ_API_KEY = os.environ.get("GROQ_API_KEY") | |
| # channel_id = "UCsv3kmQ5k1eIRG2R9mWN" # iCodeGuru | |
| # BASE_URL = "https://icode.guru" | |
| # groq_client = Groq(api_key=GROQ_API_KEY) | |
| # from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction | |
| # embedding_function = SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2") | |
| # chroma_client = chromadb.Client() | |
| # collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function) | |
| # # --- Upload + load files as vector DB --- | |
| # def load_uploaded_vectors(uploaded_files): | |
| # data = [] | |
| # for file in uploaded_files: | |
| # if file.name.endswith(".txt"): | |
| # text = file.read().decode() | |
| # data.append({"id": file.name, "content": text}) | |
| # elif file.name.endswith(".json"): | |
| # content = json.load(file) | |
| # for i, chunk in enumerate(content): | |
| # data.append({"id": f"{file.name}-{i}", "content": chunk}) | |
| # return data | |
| # def search_vector_data(query, data): | |
| # if not data: | |
| # return None | |
| # collection = chroma_client.get_or_create_collection("temp_query", embedding_function=embedding_function) | |
| # collection.add(documents=[d["content"] for d in data], ids=[d["id"] for d in data]) | |
| # results = collection.query(query_texts=[query], n_results=3) | |
| # if results and results["documents"]: | |
| # return "\n\n".join([doc for doc in results["documents"][0]]) | |
| # return None | |
| # # --- Fetch recent video IDs from YouTube channel --- | |
| # def get_latest_video_ids(channel_id, max_results=5): | |
| # url = f"https://www.googleapis.com/youtube/v3/search?key={YOUTUBE_API_KEY}&channelId={channel_id}&part=snippet,id&order=date&maxResults={max_results}" | |
| # response = requests.get(url) | |
| # videos = response.json().get('items', []) | |
| # valid_videos = [] | |
| # for v in videos: | |
| # if v['id']['kind'] == 'youtube#video': | |
| # title = v['snippet']['title'] | |
| # channel_title = v['snippet']['channelTitle'] | |
| # video_id = v['id']['videoId'] | |
| # if "icodeguru" in channel_title.lower(): | |
| # valid_videos.append((video_id, title)) | |
| # return valid_videos | |
| # # --- Get video transcripts --- | |
| # def get_video_transcripts(video_info): | |
| # results = [] | |
| # for vid, title in video_info: | |
| # try: | |
| # transcript = YouTubeTranscriptApi.get_transcript(vid) | |
| # text = " ".join([t['text'] for t in transcript]) | |
| # video_link = f"https://www.youtube.com/watch?v={vid}" | |
| # results.append({ | |
| # "video_id": vid, | |
| # "title": title, | |
| # "link": video_link, | |
| # "transcript": text | |
| # }) | |
| # except: | |
| # continue | |
| # return results | |
| # # --- Scrape icode.guru --- | |
| # def scrape_icodeguru(base_url=BASE_URL, max_pages=5): | |
| # visited = set() | |
| # blocks = [] | |
| # def crawl(url): | |
| # if url in visited or len(visited) >= max_pages: | |
| # return | |
| # visited.add(url) | |
| # try: | |
| # res = requests.get(url, timeout=10) | |
| # soup = BeautifulSoup(res.content, "html.parser") | |
| # page_text = soup.get_text(separator=" ", strip=True) | |
| # if len(page_text) > 100: | |
| # blocks.append(f"[{url}]({url}):\n{page_text[:1500]}") | |
| # for link in soup.find_all("a", href=True): | |
| # href = link['href'] | |
| # if href.startswith("/"): | |
| # href = base_url + href | |
| # if href.startswith(base_url): | |
| # crawl(href) | |
| # except: | |
| # pass | |
| # crawl(base_url) | |
| # return blocks | |
| # # --- Ask Groq --- | |
| # def ask_groq(context, question): | |
| # messages = [ | |
| # {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."}, | |
| # {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"} | |
| # ] | |
| # chat_completion = groq_client.chat.completions.create( | |
| # model="llama3-8b-8192", | |
| # messages=messages, | |
| # ) | |
| # return chat_completion.choices[0].message.content.strip() | |
| # #--- STREAMLIT APP --- | |
| # def main(): | |
| # st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide") | |
| # st.title("๐ EduBot for @icodeguru0") | |
| # st.markdown("Ask anything based on the latest YouTube videos and website content of [icode.guru](https://icode.guru).") | |
| # uploaded_files = st.file_uploader("๐ Optionally upload your knowledge files (txt or json)", type=['txt', 'json'], accept_multiple_files=True) | |
| # user_question = st.text_input("๐ฌ Ask your question:") | |
| # if user_question: | |
| # vector_data = load_uploaded_vectors(uploaded_files) if uploaded_files else [] | |
| # # Try vector DB first | |
| # vector_context = search_vector_data(user_question, vector_data) | |
| # if vector_context: | |
| # with st.spinner("๐ง Answering from uploaded knowledge..."): | |
| # answer = ask_groq(vector_context, user_question) | |
| # st.success(answer) | |
| # else: | |
| # # Fallback to real-time data | |
| # with st.spinner("๐บ Fetching YouTube videos..."): | |
| # video_info = get_latest_video_ids(channel_id, max_results=5) | |
| # transcripts = get_video_transcripts(video_info) | |
| # yt_context = "" | |
| # relevant_links = [] | |
| # for vid in transcripts: | |
| # yt_context += f"\n\n[Video: {vid['title']}]({vid['link']}):\n{vid['transcript'][:1500]}" | |
| # if user_question.lower() in vid['transcript'].lower(): | |
| # relevant_links.append(vid['link']) | |
| # with st.spinner("๐ Scraping icode.guru..."): | |
| # site_blocks = scrape_icodeguru(BASE_URL, max_pages=5) | |
| # site_context = "\n\n".join(site_blocks) | |
| # full_context = yt_context + "\n\n" + site_context | |
| # with st.spinner("๐ง Thinking..."): | |
| # answer = ask_groq(full_context, user_question) | |
| # st.success(answer) | |
| # if relevant_links: | |
| # st.markdown("### ๐ Related YouTube Links") | |
| # for link in relevant_links: | |
| # st.markdown(f"- [Watch Video]({link})") | |
| # st.markdown("---") | |
| # st.caption("Powered by YouTube, iCodeGuru, and Groq") | |
| # if __name__ == "__main__": | |
| # main() | |
| # import nest_asyncio | |
| # import streamlit as st | |
| # import os | |
| # from groq import Groq | |
| # from sentence_transformers import SentenceTransformer | |
| # import chromadb | |
| # from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction | |
| # nest_asyncio.apply() | |
| # # --- CONFIGURATION --- | |
| # GROQ_API_KEY = os.environ.get("GROQ_API_KEY") | |
| # groq_client = Groq(api_key=GROQ_API_KEY) | |
| # embedding_function = SentenceTransformerEmbeddingFunction( | |
| # model_name="all-MiniLM-L6-v2", | |
| # device="cpu" | |
| # ) | |
| # chroma_client = chromadb.Client() | |
| # collection = chroma_client.get_or_create_collection("icodeguru_knowledge", embedding_function=embedding_function) | |
| # # --- Search persistent vector DB --- | |
| # def search_vector_data(query): | |
| # results = collection.query(query_texts=[query], n_results=3) | |
| # if results and results["documents"]: | |
| # return "\n\n".join([doc for doc in results["documents"][0]]) | |
| # return None | |
| # # --- Ask Groq --- | |
| # def ask_groq(context, question): | |
| # messages = [ | |
| # {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."}, | |
| # {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"} | |
| # ] | |
| # chat_completion = groq_client.chat.completions.create( | |
| # model="llama3-8b-8192", | |
| # messages=messages, | |
| # ) | |
| # return chat_completion.choices[0].message.content.strip() | |
| # #--- STREAMLIT APP --- | |
| # def main(): | |
| # st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide") | |
| # st.title("๐ EduBot for @icodeguru0") | |
| # st.markdown("Ask anything based on pre-loaded iCodeGuru knowledge.") | |
| # user_question = st.text_input("๐ฌ Ask your question:") | |
| # if user_question: | |
| # # Try vector DB first | |
| # vector_context = search_vector_data(user_question) | |
| # if vector_context: | |
| # with st.spinner("๐ง Answering from knowledge base..."): | |
| # answer = ask_groq(vector_context, user_question) | |
| # st.success(answer) | |
| # else: | |
| # st.warning("โ ๏ธ No relevant answer found in the embedded knowledge.") | |
| # st.markdown("---") | |
| # st.caption("Powered by ChromaDB ๐ง and Groq โก") | |
| # if __name__ == "__main__": | |
| # main() | |
| # import nest_asyncio | |
| # import streamlit as st | |
| # import os | |
| # from groq import Groq | |
| # from sentence_transformers import SentenceTransformer | |
| # import chromadb | |
| # from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction | |
| # from chromadb.config import Settings | |
| # # Apply asyncio patch (required for nested event loops in Streamlit) | |
| # nest_asyncio.apply() | |
| # # --- CONFIGURATION --- | |
| # GROQ_API_KEY = os.environ.get("GROQ_API_KEY") | |
| # GROQ_MODEL = "llama3-8b-8192" | |
| # # Initialize Groq client | |
| # groq_client = Groq(api_key=GROQ_API_KEY) | |
| # # Initialize embedding model | |
| # embedding_function = SentenceTransformerEmbeddingFunction( | |
| # model_name="all-MiniLM-L6-v2", | |
| # device="cpu" | |
| # ) | |
| # # Initialize ChromaDB with persistence | |
| # chroma_client = chromadb.PersistentClient(path="./chroma_db", settings=Settings(anonymized_telemetry=False)) | |
| # collection = chroma_client.get_or_create_collection( | |
| # name="icodeguru_knowledge", | |
| # embedding_function=embedding_function | |
| # ) | |
| # # --- Search embedded knowledge --- | |
| # def search_vector_data(query): | |
| # try: | |
| # results = collection.query(query_texts=[query], n_results=3) | |
| # if results and results["documents"]: | |
| # return "\n\n".join(results["documents"][0]) | |
| # except Exception as e: | |
| # st.error(f"Vector search error: {e}") | |
| # return None | |
| # # --- Ask Groq --- | |
| # def ask_groq(context, question): | |
| # messages = [ | |
| # {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."}, | |
| # {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"} | |
| # ] | |
| # response = groq_client.chat.completions.create( | |
| # model=GROQ_MODEL, | |
| # messages=messages | |
| # ) | |
| # return response.choices[0].message.content.strip() | |
| # # --- Streamlit UI --- | |
| # def main(): | |
| # st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide") | |
| # st.title("๐ EduBot for @icodeguru0") | |
| # st.markdown("Ask anything based on pre-loaded iCodeGuru knowledge.") | |
| # user_question = st.text_input("๐ฌ Ask your question:") | |
| # if user_question: | |
| # vector_context = search_vector_data(user_question) | |
| # if vector_context: | |
| # with st.spinner("๐ง Answering from knowledge base..."): | |
| # answer = ask_groq(vector_context, user_question) | |
| # st.success(answer) | |
| # else: | |
| # st.warning("โ ๏ธ No relevant answer found in the embedded knowledge.") | |
| # st.markdown("---") | |
| # st.caption("Powered by ChromaDB ๐ง and Groq โก") | |
| # # โ This is the correct way to run the app | |
| # if __name__ == "__main__": | |
| # main() | |
| import nest_asyncio | |
| import streamlit as st | |
| import os | |
| import json | |
| from groq import Groq | |
| from sentence_transformers import SentenceTransformer | |
| import chromadb | |
| from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction | |
| from chromadb.config import Settings | |
| from langchain.document_loaders import JSONLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| # Apply asyncio patch (Streamlit fix) | |
| nest_asyncio.apply() | |
| # --- CONFIGURATION --- | |
| GROQ_API_KEY = os.environ.get("GROQ_API_KEY") | |
| GROQ_MODEL = "llama3-8b-8192" | |
| # Initialize Groq client | |
| groq_client = Groq(api_key=GROQ_API_KEY) | |
| # # Explicitly load SentenceTransformer model first to avoid meta tensor bug | |
| # embedding_model = SentenceTransformer "(all-MiniLM-L6-v2)" | |
| # # Pass this model into Chroma's embedding function | |
| # embedding_function = SentenceTransformerEmbeddingFunction(embedding_model=embedding_model) | |
| embedding_function = SentenceTransformerEmbeddingFunction( | |
| model_name="all-MiniLM-L6-v2", | |
| device="cpu" | |
| ) | |
| # Initialize ChromaDB Persistent Client | |
| chroma_client = chromadb.PersistentClient(path="./chroma_db", settings=Settings(anonymized_telemetry=False)) | |
| collection = chroma_client.get_or_create_collection( | |
| name="icodeguru_knowledge", | |
| embedding_function=embedding_function | |
| ) | |
| # --- Ingest JSON Files from /docs/ --- | |
| def ingest_docs_to_chroma(): | |
| folder_path = "./docs" | |
| all_docs = [] | |
| for filename in os.listdir(folder_path): | |
| if filename.endswith(".json"): | |
| file_path = os.path.join(folder_path, filename) | |
| loader = JSONLoader(file_path=file_path, jq_schema='.[]') | |
| docs = loader.load() | |
| all_docs.extend(docs) | |
| st.write(f"Loaded {len(docs)} documents from {filename}") | |
| # Chunk Documents | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| chunks = text_splitter.split_documents(all_docs) | |
| st.write(f"Total chunks created: {len(chunks)}") | |
| # Add Chunks to ChromaDB | |
| for chunk in chunks: | |
| # Flatten list content if necessary | |
| if isinstance(chunk.page_content, list): | |
| content = " ".join(str(item) for item in chunk.page_content).strip() | |
| else: | |
| content = str(chunk.page_content).strip() | |
| metadata = chunk.metadata | |
| doc_id = str(hash(content)) | |
| collection.add(documents=[content], metadatas=[metadata], ids=[doc_id]) | |
| st.success("โ Knowledge Base Updated Successfully!") | |
| # --- Search embedded knowledge --- | |
| def search_vector_data(query): | |
| try: | |
| results = collection.query(query_texts=[query], n_results=3) | |
| if results and results["documents"]: | |
| return "\n\n".join(results["documents"][0]) | |
| except Exception as e: | |
| st.error(f"Vector search error: {e}") | |
| return None | |
| # --- Ask Groq LLM --- | |
| def ask_groq(context, question): | |
| messages = [ | |
| {"role": "system", "content": "You are a helpful assistant. Always provide relevant video and website links if possible."}, | |
| {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}\nAnswer (include links):"} | |
| ] | |
| response = groq_client.chat.completions.create( | |
| model=GROQ_MODEL, | |
| messages=messages | |
| ) | |
| return response.choices[0].message.content.strip() | |
| # --- Streamlit UI --- | |
| def main(): | |
| st.set_page_config(page_title="EduBot for iCodeGuru", layout="wide") | |
| st.title("๐ EduBot for @icodeguru0") | |
| st.markdown("Ask anything based on pre-loaded iCodeGuru knowledge.") | |
| # --- Auto Update Knowledge Base at App Start --- | |
| st.info("๐ Updating Knowledge Base from /docs/...") | |
| ingest_docs_to_chroma() | |
| st.success("โ Knowledge Base Loaded Successfully!") | |
| st.markdown("---") | |
| user_question = st.text_input("๐ฌ Ask your question:") | |
| if user_question: | |
| vector_context = search_vector_data(user_question) | |
| if vector_context: | |
| with st.spinner("๐ง Answering from knowledge base..."): | |
| answer = ask_groq(vector_context, user_question) | |
| st.success(answer) | |
| else: | |
| st.warning("โ ๏ธ No relevant answer found in the embedded knowledge.") | |
| st.markdown("---") | |
| st.caption("Powered by ChromaDB ๐ง and Groq โก") | |
| if __name__ == "__main__": | |
| main() | |