Spaces:

tinaranathania
/

YoutubeVideoQAndAChatbot

Runtime error

App Files Files Community

tinaranathania commited on Dec 31, 2023

Commit

1221a55

•

1 Parent(s): 5963da1

Upload try.py

Browse files

Files changed (1) hide show

try.py +101 -0

try.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# Q&A Chatbot
+from langchain.llms import OpenAI
+from langchain.document_loaders import YoutubeLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.chat_models import ChatOpenAI
+from langchain.chains import LLMChain
+from dotenv import find_dotenv, load_dotenv
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
+import textwrap
+load_dotenv(find_dotenv())
+embeddings = OpenAIEmbeddings()
+#load_dotenv()  # take environment variables from .env.
+import streamlit as st
+import os
+def create_db_from_youtube_video_url(video_url):
+    # Get transcript
+    loader = YoutubeLoader.from_youtube_url(video_url)
+    transcript = loader.load()
+    # Clean the text, set max token, split in several chunks
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
+    # List with split up transcript
+    docs = text_splitter.split_documents(transcript)
+    # Create a database
+    # Turn into vector of numbers (numerical value of the docs)
+    db = FAISS.from_documents(docs, embeddings)
+    return db
+# Why 4? The model can handle up to 16,385 tokens. The chunk size is set to 2000 and k is 4 to maximize the number of tokens to analyze.
+def get_response_from_query(db, query, k=4):
+    # FIlter based on the similarity of the database with the prompt
+    docs = db.similarity_search(query, k=k)
+    docs_page_content = " ".join([d.page_content for d in docs])
+    chat = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.2)
+    # Template to use for the system message prompt
+    template = """
+        You are a helpful assistant that that can answer questions about youtube videos
+        based on the video's transcript: {docs}
+        Only use the factual information from the transcript to answer the question.
+        If you feel like you don't have enough information to answer the question, say "I don't know".
+        """
+    system_message_prompt = SystemMessagePromptTemplate.from_template(template)
+    # Human question prompt
+    human_template = "Answer the following question: {question}"
+    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
+    # Combines into a chat prompt
+    chat_prompt = ChatPromptTemplate.from_messages(
+        [system_message_prompt, human_message_prompt]
+    )
+    chain = LLMChain(llm=chat, prompt=chat_prompt)
+    response = chain.run(question=query, docs=docs_page_content)
+    response = response.replace("\n", "")
+    return response, docs
+# Webpage with Streamlit
+st.set_page_config(page_title="Youtube Video Q&A Demo")
+st.header("Langchain Application")
+youtube_input=st.text_input("Youtube Link: ",key="youtube_input")
+query=st.text_input("your Question Here: ",key="query")
+if youtube_input != "":
+    db = create_db_from_youtube_video_url(youtube_input)
+    response, docs = get_response_from_query(db, query)
+submit=st.button("Ask the question")
+## If ask button is clicked
+if submit:
+    st.subheader("The Response is")
+    st.write(response)