tinaranathania's picture
Update app.py
6e9136f
raw
history blame
3.22 kB
# Q&A Chatbot
from langchain.llms import OpenAI
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from dotenv import find_dotenv, load_dotenv
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
import textwrap
load_dotenv(find_dotenv())
embeddings = OpenAIEmbeddings()
#load_dotenv() # take environment variables from .env.
import streamlit as st
import os
def create_db_from_youtube_video_url(video_url):
# Get transcript
loader = YoutubeLoader.from_youtube_url(video_url)
transcript = loader.load()
# Clean the text, set max token, split in several chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
# List with split up transcript
docs = text_splitter.split_documents(transcript)
# Create a database
# Turn into vector of numbers (numerical value of the docs)
db = FAISS.from_documents(docs, embeddings)
return db
# Why 4? The model can handle up to 16,385 tokens. The chunk size is set to 2000 and k is 4 to maximize the number of tokens to analyze.
def get_response_from_query(db, query, k=4):
# FIlter based on the similarity of the database with the prompt
docs = db.similarity_search(query, k=k)
docs_page_content = " ".join([d.page_content for d in docs])
chat = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.2)
# Template to use for the system message prompt
template = """
You are a helpful assistant that that can answer questions about youtube videos
based on the video's transcript: {docs}
Only use the factual information from the transcript to answer the question.
If you feel like you don't have enough information to answer the question, say "I don't know".
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
# Human question prompt
human_template = "Answer the following question: {question}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
# Combines into a chat prompt
chat_prompt = ChatPromptTemplate.from_messages(
[system_message_prompt, human_message_prompt]
)
chain = LLMChain(llm=chat, prompt=chat_prompt)
response = chain.run(question=query, docs=docs_page_content)
response = response.replace("\n", "")
return response, docs
# Webpage with Streamlit
st.set_page_config(page_title="Youtube Video Q&A Demo")
st.header("Langchain Application")
youtube_input=st.text_input("Youtube Link: ",key="youtube_input")
query=st.text_input("Your Question Here: ",key="query")
if youtube_input != "":
db = create_db_from_youtube_video_url(youtube_input)
response, docs = get_response_from_query(db, query)
submit=st.button("Ask the question")
## If ask button is clicked
if submit:
st.subheader("The Response is")
st.write(response)