Spaces:

mutea
/

youtube_assistant

Sleeping

App Files Files Community

youtube_assistant / langchain_helper.py

mutea

rename

188d97c 12 months ago

raw

history blame

2.04 kB

	from langchain.document_loaders import YoutubeLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.llms import OpenAI
	from langchain.prompts import PromptTemplate
	from langchain.chains import LLMChain
	from langchain.vectorstores import FAISS # for similarity search
	from langchain.embeddings.openai import OpenAIEmbeddings


	from dotenv import load_dotenv

	load_dotenv()

	embeddings = OpenAIEmbeddings()

	# video_url = "https://www.youtube.com/watch?v=OyFJWRnt_AY"


	def create_vector_db_from_ytUrl(video_url: str) -> FAISS:
	'''First load the youtube video. calling loader.load() will automatically transcribe the youtube video. Next we split the text with RecursiveCharacterTextSplitter. Create and return a vector store object '''

	loader = YoutubeLoader.from_youtube_url(video_url)
	video_transcript = loader.load()

	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000, chunk_overlap=100)

	docs = text_splitter.split_documents(video_transcript)

	db = FAISS.from_documents(docs, embeddings)
	return db


	def get_response_from_query(db, query, k=4):
	''' text_davinci max tokens = 4097
	Semantic search '''

	docs = db.similarity_search(query, k=k)

	docs_page_content = " ".join([d.page_content for d in docs])

	llm = OpenAI(model="text-davinci-003")

	prompt = PromptTemplate(
	input_variables=["question", "docs"],
	template="""You are a helpful YouTube assistant that can answer questions about videos from video transcripts.
	Answer the following question: {question}
	By searching the following video transcript: {docs}

	If you feel like you do not have enough information to give the answer, simply say "I have not much information to answer the question!"

	Your answers should be detailed.
	"""
	)

	chain = LLMChain(llm=llm, prompt=prompt)

	response = chain.run(question=query, docs=docs_page_content)
	response = response.replace("\n", "")
	return response, docs