heliosbrahma commited on
Commit
54af26b
1 Parent(s): efaf399

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +141 -0
  2. prompt_template.txt +14 -0
  3. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+
3
+ warnings.filterwarnings("ignore")
4
+ import os, requests, openai, cohere
5
+ import gradio as gr
6
+ from pathlib import Path
7
+ from langchain.document_loaders import YoutubeLoader
8
+ from langchain.docstore.document import Document
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain.embeddings import CohereEmbeddings
11
+ from langchain.vectorstores import Qdrant
12
+ from langchain.chat_models import ChatOpenAI
13
+ from langchain.prompts import PromptTemplate
14
+ from langchain.chains import RetrievalQA
15
+ from langchain.chains.summarize import load_summarize_chain
16
+
17
+ COHERE_API_KEY = os.environ["COHERE_API_KEY"]
18
+ QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]
19
+ QDRANT_CLUSTER_URL = os.environ["QDRANT_CLUSTER_URL"]
20
+ QDRANT_COLLECTION_NAME = os.environ["QDRANT_COLLECTION_NAME"]
21
+ OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
22
+ prompt_file = "prompt_template.txt"
23
+
24
+
25
+ def yt_loader(yt_url):
26
+ res = requests.get(f"https://www.youtube.com/oembed?url={yt_url}")
27
+ if res.status_code != 200:
28
+ yield "Invalid Youtube URL. Kindly, paste here a valid Youtube URL."
29
+ return
30
+
31
+ yield "Extracting transcript from youtube url..."
32
+ loader = YoutubeLoader.from_youtube_url(yt_url, add_video_info=True)
33
+ transcript = loader.load()
34
+
35
+ video_id = transcript[0].metadata["source"]
36
+ title = transcript[0].metadata["title"]
37
+ author = transcript[0].metadata["author"]
38
+
39
+ docs = []
40
+ for i in range(len(transcript)):
41
+ doc = Document(page_content=transcript[i].page_content)
42
+ docs.append(doc)
43
+
44
+ yield "Splitting transcript into chunks of text..."
45
+ text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
46
+ model_name="gpt-3.5-turbo",
47
+ chunk_size=1024,
48
+ chunk_overlap=64,
49
+ separators=["\n\n", "\n", " "],
50
+ )
51
+
52
+ docs_splitter = text_splitter.split_documents(docs)
53
+ cohere_embeddings = CohereEmbeddings(model="large", cohere_api_key=COHERE_API_KEY)
54
+
55
+ yield "Uploading chunks of text into Qdrant..."
56
+ qdrant = Qdrant.from_documents(
57
+ docs_splitter,
58
+ cohere_embeddings,
59
+ url=QDRANT_CLUSTER_URL,
60
+ prefer_grpc=True,
61
+ api_key=QDRANT_API_KEY,
62
+ collection_name=QDRANT_COLLECTION_NAME,
63
+ )
64
+
65
+ with open(prompt_file, "r") as file:
66
+ prompt_template = file.read()
67
+
68
+ PROMPT = PromptTemplate(
69
+ template=prompt_template, input_variables=["question", "context"]
70
+ )
71
+
72
+ llm = ChatOpenAI(
73
+ model_name="gpt-3.5-turbo", temperature=0, openai_api_key=OPENAI_API_KEY
74
+ )
75
+ global qa
76
+ qa = RetrievalQA.from_chain_type(
77
+ llm=llm,
78
+ chain_type="stuff",
79
+ retriever=qdrant.as_retriever(),
80
+ chain_type_kwargs={"prompt": PROMPT},
81
+ )
82
+
83
+ yield "Generating summarized text from transcript..."
84
+ chain = load_summarize_chain(llm=llm, chain_type="map_reduce")
85
+ summarized_text = chain.run(docs_splitter)
86
+ res = (
87
+ "Video ID: "
88
+ + video_id
89
+ + "\n"
90
+ + "Video Title: "
91
+ + title
92
+ + "\n"
93
+ + "Channel Name: "
94
+ + author
95
+ + "\n"
96
+ + "Summarized Text: "
97
+ + summarized_text
98
+ )
99
+ yield res
100
+
101
+
102
+ def chat(chat_history, query):
103
+ res = qa.run(query)
104
+ progressive_response = ""
105
+
106
+ for ele in "".join(res):
107
+ progressive_response += ele + ""
108
+ yield chat_history + [(query, progressive_response)]
109
+
110
+
111
+ with gr.Blocks() as demo:
112
+ gr.HTML("""<h1>Welcome to AI Youtube Assistant</h1>""")
113
+ gr.Markdown(
114
+ "Generate transcript from youtube url. Get a summarized text of the video transcript and also ask questions to AI Youtube Assistant.<br>"
115
+ "Click on 'Build AI Bot' to extract transcript from youtube url and get a summarized text.<br>"
116
+ "After summarized text is generated, click on 'AI Assistant' tab and ask queries to the AI Assistant regarding information in the youtube video."
117
+ )
118
+
119
+ with gr.Tab("Load/Summarize Youtube Video"):
120
+ text_input = gr.Textbox(
121
+ label="Paste a valid youtube url",
122
+ placeholder="https://www.youtube.com/watch?v=AeJ9q45PfD0",
123
+ )
124
+ text_output = gr.Textbox(label="Summarized transcript of the youtube video")
125
+ text_button = gr.Button(value="Build AI Bot!")
126
+ text_button.click(yt_loader, text_input, text_output)
127
+
128
+ with gr.Tab("AI Assistant"):
129
+ chatbot = gr.Chatbot()
130
+ query = gr.Textbox(
131
+ label="Type your query here, then press 'enter' and scroll up for response"
132
+ )
133
+ chat_button = gr.Button(value="Submit Query!")
134
+ clear = gr.Button(value="Clear Chat History!")
135
+ clear.style(size="sm")
136
+ query.submit(chat, [chatbot, query], chatbot)
137
+ chat_button.click(chat, [chatbot, query], chatbot)
138
+ clear.click(lambda: None, None, chatbot, queue=False)
139
+
140
+
141
+ demo.queue().launch()
prompt_template.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """You are an AI assistant chatting with a user.
2
+ Given the following extracted parts of a video transcript and a question, answer the question truthfully at the end.
3
+ If you don't know the answer, reply that 'Answer is not available in the transcript provided'. Don't try to make up an answer.
4
+
5
+ Example Format:
6
+ CONTEXT: context here
7
+ QUESTION: question here
8
+ ANSWER: answer here
9
+
10
+ Answer should be detailed and be based explicitly on information in the context provided. Begin!
11
+
12
+ CONTEXT: {context}
13
+ QUESTION: {question}
14
+ ANSWER:"""
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ tiktoken
4
+ cohere
5
+ gradio
6
+ youtube-transcript-api
7
+ pytube
8
+ qdrant-client