Kushwanth Chowday Kandala commited on
Commit
a1f90e6
1 Parent(s): 7a0f54c

promt_engineer using bard text genaration integrated with Retrival context text.

Browse files
Files changed (1) hide show
  1. app.py +28 -10
app.py CHANGED
@@ -7,6 +7,7 @@ from io import StringIO
7
  import PyPDF2
8
  from tqdm import tqdm
9
  import math
 
10
  # import json
11
 
12
  # st.config(PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION="python")
@@ -77,6 +78,27 @@ def get_pinecone_semantic_index(pinecone):
77
  # st.text(f"Succesfully connected to the pinecone index")
78
  return index
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  def chat_actions():
81
 
82
  pinecone = connect_pinecone()
@@ -90,14 +112,16 @@ def chat_actions():
90
  # create the query vector
91
  query_vector = query_embedding.tolist()
92
  # now query vector database
93
- result = index.query(query_vector, top_k=5, include_metadata=True) # xc is a list of tuples
94
 
95
  # Create a list of lists
96
  data = []
 
97
  i = 0
98
  for res in result['matches']:
99
  i = i + 1
100
  data.append([f"{i}⭐", res['score'], res['metadata']['text']])
 
101
 
102
  # Create a DataFrame from the list of lists
103
  resdf = pd.DataFrame(data, columns=['TopRank', 'Score', 'Text'])
@@ -105,6 +129,7 @@ def chat_actions():
105
  with st.sidebar:
106
  st.markdown("*:red[semantic search results]* with **:green[Retrieval Augmented Generation]** ***(RAG)***.")
107
  st.dataframe(resdf)
 
108
 
109
  for res in result['matches']:
110
  st.session_state["chat_history"].append(
@@ -169,15 +194,6 @@ def create_embeddings():
169
  # Display the contents of the file
170
  # st.write(file_contents)
171
 
172
- # def promt_engineer(text):
173
- # promt_template = """
174
- # write a concise summary of the following text delimited by triple backquotes.
175
- # return your response in bullet points which convers the key points of the text.
176
-
177
- # ```{text}```
178
-
179
- # BULLET POINT SUMMARY:
180
- # """
181
 
182
  with st.sidebar:
183
  st.markdown("""
@@ -187,6 +203,7 @@ with st.sidebar:
187
  - It Takes couple of mins after upload the pdf
188
  - Now Chat with model to get the summarized info
189
  - Generate Promted reponses on the upload pdf
 
190
  """)
191
  uploaded_files = st.file_uploader('Choose your .pdf file', type="pdf", accept_multiple_files=True, key="uploaded_files", on_change=create_embeddings)
192
  # for uploaded_file in uploaded_files:
@@ -211,3 +228,4 @@ with st.sidebar:
211
  # print_out(pages)
212
  # combine_text(pages)
213
  # promt_engineer(text)
 
 
7
  import PyPDF2
8
  from tqdm import tqdm
9
  import math
10
+ from transformers import pipeline
11
  # import json
12
 
13
  # st.config(PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION="python")
 
78
  # st.text(f"Succesfully connected to the pinecone index")
79
  return index
80
 
81
+ def promt_engineer(text):
82
+ promt_template = """
83
+ write a concise summary of the following text delimited by triple backquotes.
84
+ return your response in bullet points which convers the key points of the text.
85
+
86
+ ```{text}```
87
+
88
+ BULLET POINT SUMMARY:
89
+ """
90
+ # Load the summarization pipeline with the specified model
91
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
92
+
93
+ # Generate the prompt
94
+ prompt = prompt_template.format(text=text)
95
+
96
+ # Generate the summary
97
+ summary = summarizer(prompt, max_length=100, min_length=50)[0]["summary_text"]
98
+
99
+ with st.sidebar:
100
+ st.write(summary)
101
+
102
  def chat_actions():
103
 
104
  pinecone = connect_pinecone()
 
112
  # create the query vector
113
  query_vector = query_embedding.tolist()
114
  # now query vector database
115
+ result = index.query(query_vector, top_k=5, include_metadata=True) # result is a list of tuples
116
 
117
  # Create a list of lists
118
  data = []
119
+ consolidated_text = ""
120
  i = 0
121
  for res in result['matches']:
122
  i = i + 1
123
  data.append([f"{i}⭐", res['score'], res['metadata']['text']])
124
+ consolidated_text.append(f"{res['metadata']['text']}\n\n")
125
 
126
  # Create a DataFrame from the list of lists
127
  resdf = pd.DataFrame(data, columns=['TopRank', 'Score', 'Text'])
 
129
  with st.sidebar:
130
  st.markdown("*:red[semantic search results]* with **:green[Retrieval Augmented Generation]** ***(RAG)***.")
131
  st.dataframe(resdf)
132
+ promt_engineer(consolidated_text)
133
 
134
  for res in result['matches']:
135
  st.session_state["chat_history"].append(
 
194
  # Display the contents of the file
195
  # st.write(file_contents)
196
 
 
 
 
 
 
 
 
 
 
197
 
198
  with st.sidebar:
199
  st.markdown("""
 
203
  - It Takes couple of mins after upload the pdf
204
  - Now Chat with model to get the summarized info
205
  - Generate Promted reponses on the upload pdf
206
+ - Provides summarized results and QA's using GPT models
207
  """)
208
  uploaded_files = st.file_uploader('Choose your .pdf file', type="pdf", accept_multiple_files=True, key="uploaded_files", on_change=create_embeddings)
209
  # for uploaded_file in uploaded_files:
 
228
  # print_out(pages)
229
  # combine_text(pages)
230
  # promt_engineer(text)
231
+