# Q&A Chatbot from langchain.llms import OpenAI from langchain.document_loaders import YoutubeLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chat_models import ChatOpenAI from langchain.chains import LLMChain from dotenv import find_dotenv, load_dotenv from langchain.prompts.chat import ( ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ) import textwrap load_dotenv(find_dotenv()) embeddings = OpenAIEmbeddings() #load_dotenv() # take environment variables from .env. import streamlit as st import os def create_db_from_youtube_video_url(video_url): # Get transcript loader = YoutubeLoader.from_youtube_url(video_url) transcript = loader.load() # Clean the text, set max token, split in several chunks text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100) # List with split up transcript docs = text_splitter.split_documents(transcript) # Create a database # Turn into vector of numbers (numerical value of the docs) db = FAISS.from_documents(docs, embeddings) return db # Why 4? The model can handle up to 16,385 tokens. The chunk size is set to 2000 and k is 4 to maximize the number of tokens to analyze. def get_response_from_query(db, query, k=4): # FIlter based on the similarity of the database with the prompt docs = db.similarity_search(query, k=k) docs_page_content = " ".join([d.page_content for d in docs]) chat = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.2) # Template to use for the system message prompt template = """ You are a helpful assistant that that can answer questions about youtube videos based on the video's transcript: {docs} Only use the factual information from the transcript to answer the question. If you feel like you don't have enough information to answer the question, say "I don't know". """ system_message_prompt = SystemMessagePromptTemplate.from_template(template) # Human question prompt human_template = "Answer the following question: {question}" human_message_prompt = HumanMessagePromptTemplate.from_template(human_template) # Combines into a chat prompt chat_prompt = ChatPromptTemplate.from_messages( [system_message_prompt, human_message_prompt] ) chain = LLMChain(llm=chat, prompt=chat_prompt) response = chain.run(question=query, docs=docs_page_content) response = response.replace("\n", "") return response, docs # Webpage with Streamlit st.set_page_config(page_title="Youtube Video Q&A Demo") st.header("Langchain Application") youtube_input=st.text_input("Youtube Link: ",key="youtube_input") query=st.text_input("your Question Here: ",key="query") if youtube_input != "": db = create_db_from_youtube_video_url(youtube_input) response, docs = get_response_from_query(db, query) submit=st.button("Ask the question") ## If ask button is clicked if submit: st.subheader("The Response is") st.write(response)