Spaces:

tinaranathania
/

YoutubeVideoQAndAChatbot

Runtime error

App Files Files Community

YoutubeVideoQAndAChatbot / app.py

tinaranathania

Update app.py

6e9136f 11 months ago

raw

history blame

3.22 kB

	# Q&A Chatbot
	from langchain.llms import OpenAI
	from langchain.document_loaders import YoutubeLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.chat_models import ChatOpenAI
	from langchain.chains import LLMChain
	from dotenv import find_dotenv, load_dotenv
	from langchain.prompts.chat import (
	ChatPromptTemplate,
	SystemMessagePromptTemplate,
	HumanMessagePromptTemplate,
	)
	import textwrap

	load_dotenv(find_dotenv())
	embeddings = OpenAIEmbeddings()

	#load_dotenv() # take environment variables from .env.

	import streamlit as st
	import os

	def create_db_from_youtube_video_url(video_url):
	# Get transcript
	loader = YoutubeLoader.from_youtube_url(video_url)
	transcript = loader.load()

	# Clean the text, set max token, split in several chunks
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)

	# List with split up transcript
	docs = text_splitter.split_documents(transcript)

	# Create a database
	# Turn into vector of numbers (numerical value of the docs)
	db = FAISS.from_documents(docs, embeddings)
	return db

	# Why 4? The model can handle up to 16,385 tokens. The chunk size is set to 2000 and k is 4 to maximize the number of tokens to analyze.
	def get_response_from_query(db, query, k=4):
	# FIlter based on the similarity of the database with the prompt
	docs = db.similarity_search(query, k=k)
	docs_page_content = " ".join([d.page_content for d in docs])

	chat = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.2)

	# Template to use for the system message prompt
	template = """
	You are a helpful assistant that that can answer questions about youtube videos
	based on the video's transcript: {docs}

	Only use the factual information from the transcript to answer the question.

	If you feel like you don't have enough information to answer the question, say "I don't know".

	"""

	system_message_prompt = SystemMessagePromptTemplate.from_template(template)

	# Human question prompt
	human_template = "Answer the following question: {question}"
	human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

	# Combines into a chat prompt
	chat_prompt = ChatPromptTemplate.from_messages(
	[system_message_prompt, human_message_prompt]
	)

	chain = LLMChain(llm=chat, prompt=chat_prompt)

	response = chain.run(question=query, docs=docs_page_content)
	response = response.replace("\n", "")
	return response, docs


	# Webpage with Streamlit

	st.set_page_config(page_title="Youtube Video Q&A Demo")

	st.header("Langchain Application")

	youtube_input=st.text_input("Youtube Link: ",key="youtube_input")
	query=st.text_input("Your Question Here: ",key="query")


	if youtube_input != "":
	db = create_db_from_youtube_video_url(youtube_input)
	response, docs = get_response_from_query(db, query)

	submit=st.button("Ask the question")

	## If ask button is clicked
	if submit:
	st.subheader("The Response is")
	st.write(response)