youtube_assistant / langchain_helper.py
mutea's picture
rename
188d97c
raw
history blame
2.04 kB
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.vectorstores import FAISS # for similarity search
from langchain.embeddings.openai import OpenAIEmbeddings
from dotenv import load_dotenv
load_dotenv()
embeddings = OpenAIEmbeddings()
# video_url = "https://www.youtube.com/watch?v=OyFJWRnt_AY"
def create_vector_db_from_ytUrl(video_url: str) -> FAISS:
'''First load the youtube video. calling loader.load() will automatically transcribe the youtube video. Next we split the text with RecursiveCharacterTextSplitter. Create and return a vector store object '''
loader = YoutubeLoader.from_youtube_url(video_url)
video_transcript = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(video_transcript)
db = FAISS.from_documents(docs, embeddings)
return db
def get_response_from_query(db, query, k=4):
''' text_davinci max tokens = 4097
Semantic search '''
docs = db.similarity_search(query, k=k)
docs_page_content = " ".join([d.page_content for d in docs])
llm = OpenAI(model="text-davinci-003")
prompt = PromptTemplate(
input_variables=["question", "docs"],
template="""You are a helpful YouTube assistant that can answer questions about videos from video transcripts.
Answer the following question: {question}
By searching the following video transcript: {docs}
If you feel like you do not have enough information to give the answer, simply say "I have not much information to answer the question!"
Your answers should be detailed.
"""
)
chain = LLMChain(llm=llm, prompt=prompt)
response = chain.run(question=query, docs=docs_page_content)
response = response.replace("\n", "")
return response, docs