Spaces:

ighoshsubho
/

youtube-summarize-QA

Runtime error

App Files Files Community

ighoshsubho commited on Aug 30, 2023

Commit

acb0418

•

1 Parent(s): cea395c

Space enviornment setup done

Browse files

Files changed (2) hide show

app.py +138 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# importing all the necessary files
+from IPython.display import YouTubeVideo
+from langchain.document_loaders import YoutubeLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains import LLMChain
+from langchain.chains.summarize import load_summarize_chain
+from langchain.llms import HuggingFacePipeline
+from langchain import PromptTemplate
+import locale
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import torch
+import langchain
+print(langchain.__version__)
+#Loading a sample video into transcript
+loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=tAuRQs_d9F8&t=52s")
+transcript = loader.load()
+# Recursive splitting of text and storing it into texts
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=50)
+texts = text_splitter.split_documents(transcript)
+# Loading the model
+model_repo = 'tiiuae/falcon-rw-1b'
+tokenizer = AutoTokenizer.from_pretrained(model_repo)
+model = AutoModelForCausalLM.from_pretrained(model_repo,
+                                             load_in_8bit=True,
+                                             device_map='auto',
+                                             torch_dtype=torch.float16,
+                                             low_cpu_mem_usage=True,
+                                             trust_remote_code=True
+                                            )
+max_len = 2048 # 1024
+task = "text-generation"
+T = 0
+# Building the pipeline
+pipe = pipeline(
+    task=task,
+    model=model,
+    tokenizer=tokenizer,
+    max_length=max_len,
+    temperature=T,
+    top_p=0.95,
+    repetition_penalty=1.15,
+    pad_token_id = 11
+)
+llm = HuggingFacePipeline(pipeline=pipe, model_kwargs = {'temperature':0})
+#Intitializing the LLM chain
+template = """
+              Write a concise summary of the following text delimited by triple backquotes.
+              Return your response in bullet points which covers the key points of the text.
+              ```{text}```
+              BULLET POINT SUMMARY:
+           """
+prompt = PromptTemplate(template=template, input_variables=["text"])
+llm_chain = LLMChain(prompt=prompt, llm=llm)
+locale.getpreferredencoding = lambda: "UTF-8"
+# import and intialize the question answer pipeline
+model_checkpoint = "IProject-10/bert-base-uncased-finetuned-squad2"
+question_answerer = pipeline("question-answering", model=model_checkpoint)
+text1 = """{}""".format(transcript[0])[14:]
+context = text1
+# Get the context of the video
+def get_context(input_text):
+   loader = YoutubeLoader.from_youtube_url("{}".format(input_text))
+   transcript = loader.load()
+   texts = text_splitter.split_documents(transcript)
+   text1 = """{}""".format(transcript[0])[14:]
+   context = text1
+   return context
+# Building the bot function
+def build_the_bot(text1):
+  context = text1
+  return('Bot Build Successfull!!!')
+# Building the bot summarizer function
+def build_the_bot_summarizer(text1):
+  text = text1
+  return llm_chain.run(text)
+# The chat space for gradio is servered here
+def chat(chat_history, user_input, context):
+  output = question_answerer(question=user_input, context=context)
+  bot_response = output["answer"]
+  #print(bot_response)
+  response = ""
+  for letter in ''.join(bot_response): #[bot_response[i:i+1] for i in range(0, len(bot_response), 1)]:
+      response += letter + ""
+      yield chat_history + [(user_input, response)]
+# Serving the entre gradio app
+with gr.Blocks() as demo:
+    gr.Markdown('# YouTube Q&A and Summarizer Bot')
+    with gr.Tab("Input URL of video you wanna load -"):
+        text_input = gr.Textbox()
+        text_output = gr.Textbox()
+        text_button1 = gr.Button("Build the Bot!!!")
+        text_button1.click(build_the_bot, get_context(text_input), text_output)
+        text_button2 = gr.Button("Summarize...")
+        text_button2.click(build_the_bot_summarizer, get_context(text_input), text_output)
+    with gr.Tab("Knowledge Base -"):
+#          inputbox = gr.Textbox("Input your text to build a Q&A Bot here.....")
+          chatbot = gr.Chatbot()
+          message = gr.Textbox ("What is this Youtube Video about?")
+          message.submit(chat, [chatbot, message], chatbot, get_context(text_input))
+demo.queue().launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+langchain
+youtube_transcript_api
+einops
+accelerate
+bitsandbytes
+xformers
+gradio
+transformers