Spaces:

NewtonKimathi
/

Youtube_Assistant

Runtime error

App Files Files Community

NewtonKimathi commited on Nov 18, 2023

Commit

83c6c56

•

1 Parent(s): 8578e68

Streamlit app

Browse files

Files changed (7) hide show

.env +1 -0
Images/Interface.png +0 -0
Images/Output.png +0 -0
Images/Youtube_Helper.jpg +0 -0
langchain_helper.py +59 -0
main.py +39 -0
requirements.txt +7 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ OPENAI_API_KEY= "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"

Images/Interface.png ADDED Viewed

Images/Output.png ADDED Viewed

Images/Youtube_Helper.jpg ADDED Viewed

langchain_helper.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from langchain.document_loaders import YoutubeLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.llms import OpenAI
+from langchain import PromptTemplate
+from langchain.chains import LLMChain
+from dotenv import load_dotenv
+# Initiating the dotenv
+load_dotenv()
+embeddings = OpenAIEmbeddings()
+# A function to create a db using FAISS
+def create_db_from_youtube_video_url(video_url: str) -> FAISS:
+    # Loading the video
+    loader = YoutubeLoader.from_youtube_url(video_url)
+    transcript = loader.load()
+    # Splitting the document into chunks
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+    docs = text_splitter.split_documents(transcript)
+    # Saving the chunks into vector store
+    db = FAISS.from_documents(docs, embeddings)
+    return db
+# A function to get the response from the query passed
+def get_response_from_query(db, query, k=4):
+    """
+    text-davinci-003 can handle up to 4097 tokens. Setting the chunksize to 1000 and k to 4 maximizes
+    the number of tokens to analyze.
+    """
+    docs = db.similarity_search(query, k=k)
+    docs_page_content = " ".join([d.page_content for d in docs])
+    llm = OpenAI(model_name="text-davinci-003")
+    prompt = PromptTemplate(
+        input_variables=["question", "docs"],
+        template="""
+        You are a helpful assistant that that can answer questions about youtube videos
+        based on the video's transcript.
+        Answer the following question: {question}
+        By searching the following video transcript: {docs}
+        Only use the factual information from the transcript to answer the question.
+        If you feel like you don't have enough information to answer the question, say "I don't know".
+        Your answers should be verbose and detailed.
+        """,
+    )
+    chain = LLMChain(llm=llm, prompt=prompt)
+    response = chain.run(question=query, docs=docs_page_content)
+    response = response.replace("\n", "")
+    return response, docs

main.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import streamlit as st
+import langchain_helper as lch
+import textwrap
+st.title("YouTube Assistant")
+st.write("""I'm here to assist you in answering questions about the Youtube video you share.
+            Just paste the link to the Youtube video and feel free to ask me anything!""")
+st.image("Images/Youtube_Helper.jpg")
+with st.sidebar:
+    with st.form(key='my_form'):
+        youtube_url = st.sidebar.text_area(
+            label="What is the YouTube video URL?",
+            max_chars=50
+            )
+        query = st.sidebar.text_area(
+            label="Ask me about the video?",
+            max_chars=50,
+            key="query"
+            )
+        openai_api_key = st.sidebar.text_input(
+            label="OpenAI API Key",
+            key="langchain_search_api_key_openai",
+            max_chars=100,
+            type="password"
+            )
+        "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
+        "[View the source code](https://github.com/Newton23-nk/Youtube_Helper_Langchain)"
+        submit_button = st.form_submit_button(label='Submit')
+if query and youtube_url:
+    if not openai_api_key:
+        st.info("Please add your OpenAI API key to continue.")
+        st.stop()
+    else:
+        db = lch.create_db_from_youtube_video_url(youtube_url)
+        response, docs = lch.get_response_from_query(db, query)
+        st.subheader("Answer:")
+        st.text(textwrap.fill(response, width=85))

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+python-dotenv
+langchain
+openai
+youtube-transcript-api
+faiss-cpu
+streamlit
+tiktoken