Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- __init__.py +0 -0
- chatbot.py +92 -0
- faq.py +15 -0
- prompts.py +23 -0
- requirements.txt +15 -0
- sidebar.py +27 -0
__init__.py
ADDED
File without changes
|
chatbot.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.document_loaders import YoutubeLoader
|
2 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
+
from langchain.vectorstores import FAISS
|
4 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
5 |
+
from langchain.llms import HuggingFaceHub
|
6 |
+
from langchain.chains import LLMChain
|
7 |
+
from dotenv import find_dotenv, load_dotenv
|
8 |
+
from prompts import CHAT_PROMPT
|
9 |
+
from youtube_transcript_api import NoTranscriptFound
|
10 |
+
import streamlit as st
|
11 |
+
import os
|
12 |
+
|
13 |
+
|
14 |
+
class YouTubeChatbot:
|
15 |
+
|
16 |
+
def __init__(self):
|
17 |
+
load_dotenv(find_dotenv())
|
18 |
+
|
19 |
+
if (st.secrets.hugging_face_api_key is not None):
|
20 |
+
os.environ.setdefault("HUGGINGFACEHUB_API_TOKEN",
|
21 |
+
st.secrets.hugging_face_api_key)
|
22 |
+
|
23 |
+
try:
|
24 |
+
self.embeddings = HuggingFaceEmbeddings()
|
25 |
+
except Exception as e:
|
26 |
+
st.error("Failed to load the Hugging Face Embeddings model: " +
|
27 |
+
str(e))
|
28 |
+
self.embeddings = None
|
29 |
+
|
30 |
+
try:
|
31 |
+
repo_id = "tiiuae/falcon-7b-instruct"
|
32 |
+
self.falcon_llm = HuggingFaceHub(
|
33 |
+
repo_id=repo_id, model_kwargs={"temperature": 0.1, "max_new_tokens": 500}
|
34 |
+
)
|
35 |
+
|
36 |
+
except Exception as e:
|
37 |
+
st.error("Failed to load the Falcon LLM model: " + str(e))
|
38 |
+
self.falcon_llm = None
|
39 |
+
|
40 |
+
|
41 |
+
@st.cache_data
|
42 |
+
def create_db_from_youtube_video_url(_self, video_url):
|
43 |
+
st.info("Creating FAISS database from YouTube video.")
|
44 |
+
loader = YoutubeLoader.from_youtube_url(video_url)
|
45 |
+
try:
|
46 |
+
transcript = loader.load()
|
47 |
+
except NoTranscriptFound:
|
48 |
+
st.error("No transcript found for the video.")
|
49 |
+
return None
|
50 |
+
|
51 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
|
52 |
+
chunk_overlap=100)
|
53 |
+
docs = text_splitter.split_documents(transcript)
|
54 |
+
st.info("Number of documents: " + str(len(docs)))
|
55 |
+
|
56 |
+
try:
|
57 |
+
db = FAISS.from_documents(docs, _self.embeddings)
|
58 |
+
st.text("Created FAISS database from documents.")
|
59 |
+
return db
|
60 |
+
except Exception as e:
|
61 |
+
st.error("Failed to create FAISS database from documents: " +
|
62 |
+
str(e))
|
63 |
+
return None
|
64 |
+
|
65 |
+
@st.cache_data
|
66 |
+
def get_response_from_query(_self, _db, query, k=4):
|
67 |
+
if _db is None:
|
68 |
+
st.error(
|
69 |
+
"Database is not initialized. Please check the error messages."
|
70 |
+
)
|
71 |
+
return None
|
72 |
+
|
73 |
+
if _self.falcon_llm is None:
|
74 |
+
st.error(
|
75 |
+
"Falcon LLM model is not loaded. Please check the error messages."
|
76 |
+
)
|
77 |
+
return None
|
78 |
+
|
79 |
+
docs = _db.similarity_search(query, k=k)
|
80 |
+
docs_page_content = " ".join([d.page_content for d in docs])
|
81 |
+
|
82 |
+
try:
|
83 |
+
chain = LLMChain(llm=_self.falcon_llm, prompt=CHAT_PROMPT)
|
84 |
+
response = chain.run(
|
85 |
+
question=query,
|
86 |
+
docs=docs_page_content
|
87 |
+
)
|
88 |
+
response = response.replace("\n", "")
|
89 |
+
return response
|
90 |
+
except Exception as e:
|
91 |
+
st.error("Failed to generate a response: " + str(e))
|
92 |
+
return None
|
faq.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
def faq():
|
4 |
+
st.markdown(
|
5 |
+
"""# FAQ
|
6 |
+
## How does YouTuberGPT work?
|
7 |
+
YouTuberGPT is a powerful tool that can help you quickly and easily find answers to your questions about YouTube videos. Simply input the URL of the video you're interested in, and YouTuberGPT will use its advanced semantic search capabilities to analyze the video and generate accurate and helpful answers to your questions.
|
8 |
+
|
9 |
+
## Are the answers always accurate?
|
10 |
+
While YouTuberGPT uses the latest and most advanced language model (GPT-3) to generate answers, there may be some instances where the answers are not entirely accurate. However, for the vast majority of use cases, YouTuberGPT is extremely accurate and can provide valuable insights and information.
|
11 |
+
|
12 |
+
So go ahead and try out YouTuberGPT for yourself - we're confident that you'll find it to be an incredibly helpful tool for all your YouTube-related questions and needs!
|
13 |
+
"""
|
14 |
+
)
|
15 |
+
|
prompts.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.prompts import (
|
2 |
+
SystemMessagePromptTemplate,
|
3 |
+
HumanMessagePromptTemplate,
|
4 |
+
ChatPromptTemplate)
|
5 |
+
|
6 |
+
# Human question prompt
|
7 |
+
human_template = "Answer the following question: {question}"
|
8 |
+
HUMAN_PROMPT = HumanMessagePromptTemplate.from_template(human_template)
|
9 |
+
|
10 |
+
# Template to use for the system message prompt
|
11 |
+
template = """
|
12 |
+
You are a helpful assistant that can answer questions about YouTube videos based on their transcripts.
|
13 |
+
|
14 |
+
To provide accurate answers, please refer to the factual information in the video transcript: {docs}
|
15 |
+
|
16 |
+
If you don't have enough information to answer the question, please respond with "I don't know".
|
17 |
+
|
18 |
+
Your answers should be detailed and provide as much information as possible.
|
19 |
+
"""
|
20 |
+
|
21 |
+
PROMPT_TEMPLATE = SystemMessagePromptTemplate.from_template(template)
|
22 |
+
|
23 |
+
CHAT_PROMPT = ChatPromptTemplate.from_messages([PROMPT_TEMPLATE, HUMAN_PROMPT])
|
requirements.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.22.0
|
2 |
+
langchain==0.0.177
|
3 |
+
faiss-cpu==1.7.3
|
4 |
+
openai==0.26.2
|
5 |
+
tiktoken==0.4.0
|
6 |
+
streamlit_elements==0.1
|
7 |
+
youtube-transcript-api==0.6.0
|
8 |
+
python-dotenv==0.21.1
|
9 |
+
pytest==7.2.1
|
10 |
+
isort==5.12.0
|
11 |
+
black==23.1a1
|
12 |
+
flake8==6.0.0
|
13 |
+
streamlit_chat==0.0.2.2
|
14 |
+
huggingface_hub==0.15.1
|
15 |
+
sentence_transformers
|
sidebar.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from components.faq import faq
|
3 |
+
|
4 |
+
def sidebar():
|
5 |
+
with st.sidebar:
|
6 |
+
st.markdown("# 🤖YoutuberGPT")
|
7 |
+
st.markdown("---")
|
8 |
+
st.markdown("# How to use?")
|
9 |
+
st.markdown(
|
10 |
+
"- Input the URL of the video you are interested in "
|
11 |
+
"- YouTuberGPT will use its advanced semantic search "
|
12 |
+
"capabilities to analyze the video and generate accurate and helpful answer to your questions ")
|
13 |
+
|
14 |
+
st.markdown("# About")
|
15 |
+
st.markdown(
|
16 |
+
"YouTuberGPT allows you to ask questions about YouTube videos. "
|
17 |
+
)
|
18 |
+
faq()
|
19 |
+
st.markdown(
|
20 |
+
"This tool is a work in progress. "
|
21 |
+
"You can contribute to the project on [GitHub](https://github.com/jorgik1/youtuber_ai_chatbot) " # noqa: E501
|
22 |
+
"with your feedback and suggestions💡"
|
23 |
+
)
|
24 |
+
st.markdown("Made by [jorgik1](https://github.com/jorgik1)")
|
25 |
+
st.markdown("---")
|
26 |
+
st.markdown("# Donate")
|
27 |
+
st.markdown("[Buy me a coffee](https://www.buymeacoffee.com/youtubergtp)")
|