Spaces:
Sleeping
Sleeping
from langchain.document_loaders import YoutubeLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.llms import OpenAI | |
from langchain.prompts import PromptTemplate | |
from langchain.chains import LLMChain | |
from langchain.vectorstores import FAISS # for similarity search | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from dotenv import load_dotenv | |
load_dotenv() | |
embeddings = OpenAIEmbeddings() | |
# video_url = "https://www.youtube.com/watch?v=OyFJWRnt_AY" | |
def create_vector_db_from_ytUrl(video_url: str) -> FAISS: | |
'''First load the youtube video. calling loader.load() will automatically transcribe the youtube video. Next we split the text with RecursiveCharacterTextSplitter. Create and return a vector store object ''' | |
loader = YoutubeLoader.from_youtube_url(video_url) | |
video_transcript = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1000, chunk_overlap=100) | |
docs = text_splitter.split_documents(video_transcript) | |
db = FAISS.from_documents(docs, embeddings) | |
return db | |
def get_response_from_query(db, query, k=4): | |
''' text_davinci max tokens = 4097 | |
Semantic search ''' | |
docs = db.similarity_search(query, k=k) | |
docs_page_content = " ".join([d.page_content for d in docs]) | |
llm = OpenAI(model="text-davinci-003") | |
prompt = PromptTemplate( | |
input_variables=["question", "docs"], | |
template="""You are a helpful YouTube assistant that can answer questions about videos from video transcripts. | |
Answer the following question: {question} | |
By searching the following video transcript: {docs} | |
If you feel like you do not have enough information to give the answer, simply say "I have not much information to answer the question!" | |
Your answers should be detailed. | |
""" | |
) | |
chain = LLMChain(llm=llm, prompt=prompt) | |
response = chain.run(question=query, docs=docs_page_content) | |
response = response.replace("\n", "") | |
return response, docs |