tinaranathania commited on
Commit
1221a55
1 Parent(s): 5963da1

Upload try.py

Browse files
Files changed (1) hide show
  1. try.py +101 -0
try.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Q&A Chatbot
2
+ from langchain.llms import OpenAI
3
+ from langchain.document_loaders import YoutubeLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chat_models import ChatOpenAI
8
+ from langchain.chains import LLMChain
9
+ from dotenv import find_dotenv, load_dotenv
10
+ from langchain.prompts.chat import (
11
+ ChatPromptTemplate,
12
+ SystemMessagePromptTemplate,
13
+ HumanMessagePromptTemplate,
14
+ )
15
+ import textwrap
16
+
17
+ load_dotenv(find_dotenv())
18
+ embeddings = OpenAIEmbeddings()
19
+
20
+ #load_dotenv() # take environment variables from .env.
21
+
22
+ import streamlit as st
23
+ import os
24
+
25
+ def create_db_from_youtube_video_url(video_url):
26
+ # Get transcript
27
+ loader = YoutubeLoader.from_youtube_url(video_url)
28
+ transcript = loader.load()
29
+
30
+ # Clean the text, set max token, split in several chunks
31
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
32
+
33
+ # List with split up transcript
34
+ docs = text_splitter.split_documents(transcript)
35
+
36
+ # Create a database
37
+ # Turn into vector of numbers (numerical value of the docs)
38
+ db = FAISS.from_documents(docs, embeddings)
39
+ return db
40
+
41
+ # Why 4? The model can handle up to 16,385 tokens. The chunk size is set to 2000 and k is 4 to maximize the number of tokens to analyze.
42
+ def get_response_from_query(db, query, k=4):
43
+ # FIlter based on the similarity of the database with the prompt
44
+ docs = db.similarity_search(query, k=k)
45
+ docs_page_content = " ".join([d.page_content for d in docs])
46
+
47
+ chat = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.2)
48
+
49
+ # Template to use for the system message prompt
50
+ template = """
51
+ You are a helpful assistant that that can answer questions about youtube videos
52
+ based on the video's transcript: {docs}
53
+
54
+ Only use the factual information from the transcript to answer the question.
55
+
56
+ If you feel like you don't have enough information to answer the question, say "I don't know".
57
+
58
+ """
59
+
60
+ system_message_prompt = SystemMessagePromptTemplate.from_template(template)
61
+
62
+ # Human question prompt
63
+ human_template = "Answer the following question: {question}"
64
+ human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
65
+
66
+ # Combines into a chat prompt
67
+ chat_prompt = ChatPromptTemplate.from_messages(
68
+ [system_message_prompt, human_message_prompt]
69
+ )
70
+
71
+ chain = LLMChain(llm=chat, prompt=chat_prompt)
72
+
73
+ response = chain.run(question=query, docs=docs_page_content)
74
+ response = response.replace("\n", "")
75
+ return response, docs
76
+
77
+
78
+ # Webpage with Streamlit
79
+
80
+ st.set_page_config(page_title="Youtube Video Q&A Demo")
81
+
82
+ st.header("Langchain Application")
83
+
84
+ youtube_input=st.text_input("Youtube Link: ",key="youtube_input")
85
+ query=st.text_input("your Question Here: ",key="query")
86
+
87
+
88
+ if youtube_input != "":
89
+ db = create_db_from_youtube_video_url(youtube_input)
90
+ response, docs = get_response_from_query(db, query)
91
+
92
+ submit=st.button("Ask the question")
93
+
94
+ ## If ask button is clicked
95
+ if submit:
96
+ st.subheader("The Response is")
97
+ st.write(response)
98
+
99
+
100
+
101
+