sidmanale643 commited on
Commit
0b0e355
β€’
1 Parent(s): fc6c4eb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -0
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain.callbacks.manager import CallbackManager
5
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
6
+ from langchain.llms import LlamaCpp
7
+ from llama_cpp import Llama
8
+ from huggingface_hub import hf_hub_download
9
+ from langchain_community.vectorstores import Chroma
10
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+ from huggingface_hub import hf_hub_download
12
+ from langchain.chains.summarize import load_summarize_chain
13
+ from langchain.document_loaders import YoutubeLoader
14
+ import streamlit as st
15
+ from langchain.chains import RetrievalQA
16
+ from langchain import PromptTemplate
17
+ import os
18
+ import re
19
+ from langchain_community.llms import Ollama
20
+
21
+ template = f"""Use the following pieces of information to answer the user's question.
22
+ If you don't know the answer or if the data might be outdated, just say that you don't know or acknowledge the potential time lapse, don't try to make up an answer.
23
+
24
+ Context: {{context}}
25
+ Question: {{question}}
26
+
27
+ Only return the helpful answer below and nothing else.
28
+ Helpful answer:
29
+ """
30
+ def set_custom_prompt():
31
+ prompt = PromptTemplate(template=template, input_variables=['context', 'question'])
32
+ return prompt
33
+
34
+ def load_data(url):
35
+ data1 = YoutubeLoader.from_youtube_url(url , add_video_info=True)
36
+ data = data1.load()
37
+ return data
38
+
39
+ def split_data(data):
40
+ splitter = RecursiveCharacterTextSplitter(chunk_size = 2000 , chunk_overlap = 200)
41
+ splits = splitter.split_documents(data)
42
+ return splits
43
+
44
+ def init_llm():
45
+ model_name = "google/gemma-2b-it"
46
+ model_file = "gemma-2b-it.gguf"
47
+ HF_TOKEN = st.secrets["HF_TOKEN"]
48
+ model_pth = hf_hub_download(model_name,
49
+ filename=model_file,
50
+ token= HF_TOKEN)
51
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
52
+ llm = LlamaCpp(model_path = model_pth , max_tokens = 2000 , n_gpu_layers = -1 ,callback_manager= callback_manager, verbose=True,)
53
+ # llm = Ollama(model="mistral:latest")
54
+ # return llm
55
+
56
+ def init_db(splits):
57
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
58
+ model_kwargs={'device': 'cpu'})
59
+ Chroma.from_documents(splits , embedding_func , persist_directory="./chroma_db5")
60
+
61
+ def init_chain(llm , db_chroma , prompt):
62
+ qa_chain = RetrievalQA.from_chain_type(llm=llm,
63
+ chain_type='stuff',
64
+ retriever= db_chroma.as_retriever(search_kwargs={ "k": 4}),
65
+ chain_type_kwargs={'prompt': prompt},
66
+ )
67
+ return qa_chain
68
+
69
+ def chat_bot():
70
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
71
+ model_kwargs={'device': 'cpu'})
72
+ db_chroma = Chroma(persist_directory="./chroma_db5", embedding_function= embeddings)
73
+ llm = init_llm()
74
+ qa_prompt = set_custom_prompt()
75
+ qa = init_chain(llm, db_chroma , qa_prompt)
76
+ return qa
77
+
78
+ st.set_page_config(page_title="InsightBOT : Your YouTube Companion" ,page_icon = "πŸ€–")
79
+ st.title("InsightBOT πŸ€–")
80
+
81
+ st.sidebar.subheader("Youtube URL πŸ”—")
82
+
83
+ url = st.sidebar.text_input('Enter Youtube Video URL:')
84
+
85
+ if st.sidebar.button("Analyze Video"):
86
+ st.video(url, format='video/mp4')
87
+
88
+ with st.spinner("Extracting insights... πŸ§ πŸ’‘"):
89
+ data = load_data(url)
90
+ splits = split_data(data)
91
+ db = init_db(splits)
92
+
93
+ if st.sidebar.button('Summarise Video'):
94
+ with st.spinner('Writing video synopsis... πŸ–ŠοΈ'):
95
+ data = load_data(url)
96
+ splits = split_data(data)
97
+ llm = init_llm()
98
+ sum_chain = load_summarize_chain(llm = llm , chain_type = "map_reduce")
99
+ summary = sum_chain.run(splits)
100
+ st.write(summary)
101
+
102
+ st.markdown("Summarise and Engage with Your Video Content! πŸ’‘")
103
+
104
+ st.sidebar.markdown("---")
105
+ st.sidebar.caption("Created By: Sidhant Manale ")
106
+
107
+ if "messages" not in st.session_state:
108
+ st.session_state["messages"] = [{"role": "assistant", "content": f"Hey! How can I help you?"}]
109
+
110
+ for msg in st.session_state.messages:
111
+ st.chat_message(msg["role"]).write(msg["content"])
112
+
113
+ st.sidebar.markdown("#")
114
+ st.sidebar.markdown("#")
115
+ st.sidebar.subheader("Clear Chat")
116
+ if st.sidebar.button("Reset"):
117
+ st.session_state.messages = []
118
+
119
+ if user_prompt := st.chat_input():
120
+
121
+ st.session_state.messages.append({"role": "user", "content": user_prompt})
122
+ st.chat_message("user").write(user_prompt)
123
+ with st.spinner("Thinking..."):
124
+ qa_result = chat_bot()
125
+ response = qa_result({'query': user_prompt})
126
+ bot_answer = response['result']
127
+
128
+ if 'source_documents' in response and response['source_documentsz ']:
129
+ document = response['source_documents'][0]
130
+ metadata = document.metadata
131
+ file = metadata['source'].split("\\")[-1]
132
+ source = os.path.splitext(file)[0]
133
+ assistant_answer = f"{bot_answer} \n\n Source : {source} Video"
134
+ else:
135
+ source = "Llama"
136
+ assistant_answer = f"{bot_answer} \n\n Source : {source} Model"
137
+
138
+ st.session_state.messages.append({"role": "assistant", "content": bot_answer})
139
+ st.chat_message("assistant").write(bot_answer)
140
+