mutea commited on
Commit
f7183a0
1 Parent(s): 37c9f83

Update langchain_helper.py

Browse files
Files changed (1) hide show
  1. langchain_helper.py +60 -0
langchain_helper.py CHANGED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.document_loaders import YoutubeLoader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.llms import OpenAI
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain.chains import LLMChain
6
+ from langchain.vectorstores import FAISS # for similarity search
7
+ from langchain.embeddings.openai import OpenAIEmbeddings
8
+
9
+
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+
14
+ embeddings = OpenAIEmbeddings()
15
+
16
+ # video_url = "https://www.youtube.com/watch?v=OyFJWRnt_AY"
17
+
18
+
19
+ def create_vector_db_from_ytUrl(video_url: str) -> FAISS:
20
+ '''First load the youtube video. calling loader.load() will automatically transcribe the youtube video. Next we split the text with RecursiveCharacterTextSplitter. Create and return a vector store object '''
21
+
22
+ loader = YoutubeLoader.from_youtube_url(video_url)
23
+ video_transcript = loader.load()
24
+
25
+ text_splitter = RecursiveCharacterTextSplitter(
26
+ chunk_size=1000, chunk_overlap=100)
27
+
28
+ docs = text_splitter.split_documents(video_transcript)
29
+
30
+ db = FAISS.from_documents(docs, embeddings)
31
+ return db
32
+
33
+
34
+ def get_response_from_query(db, query, k=4):
35
+ ''' text_davinci max tokens = 4097
36
+ Semantic search '''
37
+
38
+ docs = db.similarity_search(query, k=k)
39
+
40
+ docs_page_content = " ".join([d.page_content for d in docs])
41
+
42
+ llm = OpenAI(model="text-davinci-003")
43
+
44
+ prompt = PromptTemplate(
45
+ input_variables=["question", "docs"],
46
+ template="""You are a helpful YouTube assistant that can answer questions about videos from video transcripts.
47
+ Answer the following question: {question}
48
+ By searching the following video transcript: {docs}
49
+
50
+ If you feel like you do not have enough information to give the answer, simply say "I have not much information to answer the question!"
51
+
52
+ Your answers should be detailed.
53
+ """
54
+ )
55
+
56
+ chain = LLMChain(llm=llm, prompt=prompt)
57
+
58
+ response = chain.run(question=query, docs=docs_page_content)
59
+ response = response.replace("\n", "")
60
+ return response, docs