Spaces:
Runtime error
Runtime error
ighoshsubho
commited on
Commit
β’
acb0418
1
Parent(s):
cea395c
Space enviornment setup done
Browse files- app.py +138 -0
- requirements.txt +8 -0
app.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# importing all the necessary files
|
2 |
+
|
3 |
+
from IPython.display import YouTubeVideo
|
4 |
+
|
5 |
+
from langchain.document_loaders import YoutubeLoader
|
6 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
+
from langchain.chains import LLMChain
|
8 |
+
from langchain.chains.summarize import load_summarize_chain
|
9 |
+
from langchain.llms import HuggingFacePipeline
|
10 |
+
from langchain import PromptTemplate
|
11 |
+
import locale
|
12 |
+
import gradio as gr
|
13 |
+
|
14 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
15 |
+
|
16 |
+
import torch
|
17 |
+
|
18 |
+
import langchain
|
19 |
+
print(langchain.__version__)
|
20 |
+
|
21 |
+
#Loading a sample video into transcript
|
22 |
+
|
23 |
+
loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=tAuRQs_d9F8&t=52s")
|
24 |
+
transcript = loader.load()
|
25 |
+
|
26 |
+
# Recursive splitting of text and storing it into texts
|
27 |
+
|
28 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=50)
|
29 |
+
texts = text_splitter.split_documents(transcript)
|
30 |
+
|
31 |
+
# Loading the model
|
32 |
+
|
33 |
+
model_repo = 'tiiuae/falcon-rw-1b'
|
34 |
+
|
35 |
+
tokenizer = AutoTokenizer.from_pretrained(model_repo)
|
36 |
+
|
37 |
+
model = AutoModelForCausalLM.from_pretrained(model_repo,
|
38 |
+
load_in_8bit=True,
|
39 |
+
device_map='auto',
|
40 |
+
torch_dtype=torch.float16,
|
41 |
+
low_cpu_mem_usage=True,
|
42 |
+
trust_remote_code=True
|
43 |
+
)
|
44 |
+
max_len = 2048 # 1024
|
45 |
+
task = "text-generation"
|
46 |
+
T = 0
|
47 |
+
|
48 |
+
# Building the pipeline
|
49 |
+
|
50 |
+
pipe = pipeline(
|
51 |
+
task=task,
|
52 |
+
model=model,
|
53 |
+
tokenizer=tokenizer,
|
54 |
+
max_length=max_len,
|
55 |
+
temperature=T,
|
56 |
+
top_p=0.95,
|
57 |
+
repetition_penalty=1.15,
|
58 |
+
pad_token_id = 11
|
59 |
+
)
|
60 |
+
|
61 |
+
llm = HuggingFacePipeline(pipeline=pipe, model_kwargs = {'temperature':0})
|
62 |
+
|
63 |
+
#Intitializing the LLM chain
|
64 |
+
|
65 |
+
template = """
|
66 |
+
Write a concise summary of the following text delimited by triple backquotes.
|
67 |
+
Return your response in bullet points which covers the key points of the text.
|
68 |
+
```{text}```
|
69 |
+
BULLET POINT SUMMARY:
|
70 |
+
"""
|
71 |
+
|
72 |
+
prompt = PromptTemplate(template=template, input_variables=["text"])
|
73 |
+
|
74 |
+
llm_chain = LLMChain(prompt=prompt, llm=llm)
|
75 |
+
|
76 |
+
locale.getpreferredencoding = lambda: "UTF-8"
|
77 |
+
|
78 |
+
# import and intialize the question answer pipeline
|
79 |
+
|
80 |
+
model_checkpoint = "IProject-10/bert-base-uncased-finetuned-squad2"
|
81 |
+
question_answerer = pipeline("question-answering", model=model_checkpoint)
|
82 |
+
|
83 |
+
text1 = """{}""".format(transcript[0])[14:]
|
84 |
+
|
85 |
+
context = text1
|
86 |
+
|
87 |
+
# Get the context of the video
|
88 |
+
|
89 |
+
def get_context(input_text):
|
90 |
+
loader = YoutubeLoader.from_youtube_url("{}".format(input_text))
|
91 |
+
transcript = loader.load()
|
92 |
+
texts = text_splitter.split_documents(transcript)
|
93 |
+
text1 = """{}""".format(transcript[0])[14:]
|
94 |
+
context = text1
|
95 |
+
return context
|
96 |
+
|
97 |
+
# Building the bot function
|
98 |
+
|
99 |
+
def build_the_bot(text1):
|
100 |
+
context = text1
|
101 |
+
return('Bot Build Successfull!!!')
|
102 |
+
|
103 |
+
# Building the bot summarizer function
|
104 |
+
|
105 |
+
def build_the_bot_summarizer(text1):
|
106 |
+
text = text1
|
107 |
+
return llm_chain.run(text)
|
108 |
+
|
109 |
+
# The chat space for gradio is servered here
|
110 |
+
|
111 |
+
def chat(chat_history, user_input, context):
|
112 |
+
|
113 |
+
output = question_answerer(question=user_input, context=context)
|
114 |
+
bot_response = output["answer"]
|
115 |
+
#print(bot_response)
|
116 |
+
response = ""
|
117 |
+
for letter in ''.join(bot_response): #[bot_response[i:i+1] for i in range(0, len(bot_response), 1)]:
|
118 |
+
response += letter + ""
|
119 |
+
yield chat_history + [(user_input, response)]
|
120 |
+
|
121 |
+
# Serving the entre gradio app
|
122 |
+
|
123 |
+
with gr.Blocks() as demo:
|
124 |
+
gr.Markdown('# YouTube Q&A and Summarizer Bot')
|
125 |
+
with gr.Tab("Input URL of video you wanna load -"):
|
126 |
+
text_input = gr.Textbox()
|
127 |
+
text_output = gr.Textbox()
|
128 |
+
text_button1 = gr.Button("Build the Bot!!!")
|
129 |
+
text_button1.click(build_the_bot, get_context(text_input), text_output)
|
130 |
+
text_button2 = gr.Button("Summarize...")
|
131 |
+
text_button2.click(build_the_bot_summarizer, get_context(text_input), text_output)
|
132 |
+
with gr.Tab("Knowledge Base -"):
|
133 |
+
# inputbox = gr.Textbox("Input your text to build a Q&A Bot here.....")
|
134 |
+
chatbot = gr.Chatbot()
|
135 |
+
message = gr.Textbox ("What is this Youtube Video about?")
|
136 |
+
message.submit(chat, [chatbot, message], chatbot, get_context(text_input))
|
137 |
+
|
138 |
+
demo.queue().launch()
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
youtube_transcript_api
|
3 |
+
einops
|
4 |
+
accelerate
|
5 |
+
bitsandbytes
|
6 |
+
xformers
|
7 |
+
gradio
|
8 |
+
transformers
|