Spaces:

NewtonKimathi
/

Youtube_Assistant

Runtime error

App Files Files Community

Youtube_Assistant / langchain_helper.py

NewtonKimathi

Streamlit app

83c6c56 10 months ago

raw

history blame

No virus

2.16 kB

	from langchain.document_loaders import YoutubeLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.llms import OpenAI
	from langchain import PromptTemplate
	from langchain.chains import LLMChain
	from dotenv import load_dotenv

	# Initiating the dotenv
	load_dotenv()
	embeddings = OpenAIEmbeddings()

	# A function to create a db using FAISS
	def create_db_from_youtube_video_url(video_url: str) -> FAISS:
	# Loading the video
	loader = YoutubeLoader.from_youtube_url(video_url)
	transcript = loader.load()
	# Splitting the document into chunks
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
	docs = text_splitter.split_documents(transcript)
	# Saving the chunks into vector store
	db = FAISS.from_documents(docs, embeddings)
	return db

	# A function to get the response from the query passed
	def get_response_from_query(db, query, k=4):
	"""
	text-davinci-003 can handle up to 4097 tokens. Setting the chunksize to 1000 and k to 4 maximizes
	the number of tokens to analyze.
	"""

	docs = db.similarity_search(query, k=k)
	docs_page_content = " ".join([d.page_content for d in docs])

	llm = OpenAI(model_name="text-davinci-003")

	prompt = PromptTemplate(
	input_variables=["question", "docs"],
	template="""
	You are a helpful assistant that that can answer questions about youtube videos
	based on the video's transcript.

	Answer the following question: {question}
	By searching the following video transcript: {docs}

	Only use the factual information from the transcript to answer the question.

	If you feel like you don't have enough information to answer the question, say "I don't know".

	Your answers should be verbose and detailed.
	""",
	)

	chain = LLMChain(llm=llm, prompt=prompt)

	response = chain.run(question=query, docs=docs_page_content)
	response = response.replace("\n", "")
	return response, docs