Spaces:
Runtime error
Runtime error
# Q&A Chatbot | |
from langchain.llms import OpenAI | |
from langchain.document_loaders import YoutubeLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.chat_models import ChatOpenAI | |
from langchain.chains import LLMChain | |
from dotenv import find_dotenv, load_dotenv | |
from langchain.prompts.chat import ( | |
ChatPromptTemplate, | |
SystemMessagePromptTemplate, | |
HumanMessagePromptTemplate, | |
) | |
import textwrap | |
load_dotenv(find_dotenv()) | |
embeddings = OpenAIEmbeddings() | |
#load_dotenv() # take environment variables from .env. | |
import streamlit as st | |
import os | |
def create_db_from_youtube_video_url(video_url): | |
# Get transcript | |
loader = YoutubeLoader.from_youtube_url(video_url) | |
transcript = loader.load() | |
# Clean the text, set max token, split in several chunks | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100) | |
# List with split up transcript | |
docs = text_splitter.split_documents(transcript) | |
# Create a database | |
# Turn into vector of numbers (numerical value of the docs) | |
db = FAISS.from_documents(docs, embeddings) | |
return db | |
# Why 4? The model can handle up to 16,385 tokens. The chunk size is set to 2000 and k is 4 to maximize the number of tokens to analyze. | |
def get_response_from_query(db, query, k=4): | |
# FIlter based on the similarity of the database with the prompt | |
docs = db.similarity_search(query, k=k) | |
docs_page_content = " ".join([d.page_content for d in docs]) | |
chat = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.2) | |
# Template to use for the system message prompt | |
template = """ | |
You are a helpful assistant that that can answer questions about youtube videos | |
based on the video's transcript: {docs} | |
Only use the factual information from the transcript to answer the question. | |
If you feel like you don't have enough information to answer the question, say "I don't know". | |
""" | |
system_message_prompt = SystemMessagePromptTemplate.from_template(template) | |
# Human question prompt | |
human_template = "Answer the following question: {question}" | |
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template) | |
# Combines into a chat prompt | |
chat_prompt = ChatPromptTemplate.from_messages( | |
[system_message_prompt, human_message_prompt] | |
) | |
chain = LLMChain(llm=chat, prompt=chat_prompt) | |
response = chain.run(question=query, docs=docs_page_content) | |
response = response.replace("\n", "") | |
return response, docs | |
# Webpage with Streamlit | |
st.set_page_config(page_title="Youtube Video Q&A Demo") | |
st.header("Langchain Application") | |
youtube_input=st.text_input("Youtube Link: ",key="youtube_input") | |
query=st.text_input("Your Question Here: ",key="query") | |
if youtube_input != "": | |
db = create_db_from_youtube_video_url(youtube_input) | |
response, docs = get_response_from_query(db, query) | |
submit=st.button("Ask the question") | |
## If ask button is clicked | |
if submit: | |
st.subheader("The Response is") | |
st.write(response) | |