Spaces:
Runtime error
Runtime error
ashhadahsan
commited on
Commit
β’
9e0003c
1
Parent(s):
7857b5a
Update pages/1_π_predict.py
Browse files- pages/1_π_predict.py +49 -26
pages/1_π_predict.py
CHANGED
@@ -15,7 +15,8 @@ from utils.openllmapi.exceptions import *
|
|
15 |
import time
|
16 |
from typing import List
|
17 |
from collections import OrderedDict
|
18 |
-
|
|
|
19 |
|
20 |
tokenizer_kwargs = dict(
|
21 |
max_length=128,
|
@@ -65,6 +66,18 @@ def assignHF(bot, what: str, to: str, old: List):
|
|
65 |
return ""
|
66 |
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
@st.cache_resource
|
69 |
def loadZeroShotClassification():
|
70 |
classifierzero = pipeline(
|
@@ -73,6 +86,17 @@ def loadZeroShotClassification():
|
|
73 |
return classifierzero
|
74 |
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
def assignZeroShot(zero, to: str, old: List):
|
77 |
assigned = zero(to, old)
|
78 |
assigneddict = dict(zip(assigned["labels"], assigned["scores"]))
|
@@ -139,20 +163,18 @@ def classify_sub_theme() -> TextClassificationPipeline:
|
|
139 |
|
140 |
st.set_page_config(layout="wide", page_title="Amazon Review | Summarizer")
|
141 |
st.title("Amazon Review Summarizer")
|
142 |
-
st.write('secrets are')
|
143 |
|
144 |
-
st.write(st.secrets["hf-chat"])
|
145 |
uploaded_file = st.file_uploader("Choose a file", type=["xlsx", "xls", "csv"])
|
146 |
|
147 |
-
try:
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
except ChatBotInitError as e:
|
155 |
-
|
156 |
|
157 |
summarizer_option = st.selectbox(
|
158 |
"Select Summarizer",
|
@@ -188,6 +210,7 @@ if st.button("Process", type="primary"):
|
|
188 |
oneline = SimpleT5()
|
189 |
load_one_line_summarizer(model=oneline)
|
190 |
zeroline = loadZeroShotClassification()
|
|
|
191 |
|
192 |
cancel_button = st.empty()
|
193 |
cancel_button2 = st.empty()
|
@@ -203,7 +226,7 @@ if st.button("Process", type="primary"):
|
|
203 |
print(summarizer_option)
|
204 |
outputdf = pd.DataFrame()
|
205 |
try:
|
206 |
-
text = df["text"].values.tolist()
|
207 |
outputdf["text"] = text
|
208 |
if summarizer_option == "Custom trained on the dataset":
|
209 |
if summary_yes:
|
@@ -243,17 +266,17 @@ if st.button("Process", type="primary"):
|
|
243 |
classes.append(output)
|
244 |
score = round(themePipe(x)[0][0]["score"], 2)
|
245 |
if score <= treshold:
|
246 |
-
onelineoutput=oneline.predict(x)[0]
|
247 |
time.sleep(SLEEP)
|
248 |
print("hit")
|
249 |
classesUnlabel.append(
|
250 |
-
|
251 |
bot=bot,
|
252 |
what="theme",
|
253 |
to=onelineoutput,
|
254 |
old=themes,
|
255 |
)
|
256 |
-
)
|
257 |
classesUnlabelZero.append(
|
258 |
assignZeroShot(
|
259 |
zero=zeroline, to=onelineoutput, old=themes
|
@@ -283,13 +306,13 @@ if st.button("Process", type="primary"):
|
|
283 |
classes.append(output)
|
284 |
score = round(subThemePipe(x)[0][0]["score"], 2)
|
285 |
if score <= treshold:
|
286 |
-
onelineoutput=oneline.predict(x)[0]
|
287 |
|
288 |
time.sleep(SLEEP)
|
289 |
|
290 |
print("hit")
|
291 |
classesUnlabel.append(
|
292 |
-
|
293 |
bot=bot,
|
294 |
what="subtheme",
|
295 |
to=onelineoutput,
|
@@ -362,13 +385,13 @@ if st.button("Process", type="primary"):
|
|
362 |
classes.append(output)
|
363 |
score = round(themePipe(x)[0][0]["score"], 2)
|
364 |
if score <= treshold:
|
365 |
-
onelineoutput=oneline.predict(x)[0]
|
366 |
|
367 |
print("hit")
|
368 |
time.sleep(SLEEP)
|
369 |
|
370 |
classesUnlabel.append(
|
371 |
-
|
372 |
bot=bot,
|
373 |
what="theme",
|
374 |
to=onelineoutput,
|
@@ -404,12 +427,12 @@ if st.button("Process", type="primary"):
|
|
404 |
classes.append(output)
|
405 |
score = round(subThemePipe(x)[0][0]["score"], 2)
|
406 |
if score <= treshold:
|
407 |
-
onelineoutput=oneline.predict(x)[0]
|
408 |
|
409 |
time.sleep(SLEEP)
|
410 |
print("hit")
|
411 |
classesUnlabel.append(
|
412 |
-
|
413 |
bot=bot,
|
414 |
what="subtheme",
|
415 |
to=onelineoutput,
|
@@ -472,13 +495,13 @@ if st.button("Process", type="primary"):
|
|
472 |
classes.append(output)
|
473 |
score = round(themePipe(x)[0][0]["score"], 2)
|
474 |
if score <= treshold:
|
475 |
-
onelineoutput=oneline.predict(x)[0]
|
476 |
|
477 |
time.sleep(SLEEP)
|
478 |
|
479 |
print("hit")
|
480 |
classesUnlabel.append(
|
481 |
-
|
482 |
bot=bot,
|
483 |
what="theme",
|
484 |
to=onelineoutput,
|
@@ -514,11 +537,11 @@ if st.button("Process", type="primary"):
|
|
514 |
score = round(subThemePipe(x)[0][0]["score"], 2)
|
515 |
if score <= treshold:
|
516 |
print("hit")
|
517 |
-
onelineoutput=oneline.predict(x)[0]
|
518 |
|
519 |
time.sleep(SLEEP)
|
520 |
classesUnlabel.append(
|
521 |
-
|
522 |
bot=bot,
|
523 |
what="subtheme",
|
524 |
to=onelineoutput,
|
|
|
15 |
import time
|
16 |
from typing import List
|
17 |
from collections import OrderedDict
|
18 |
+
from langchain.llms import OpenLLM
|
19 |
+
from langchain import PromptTemplate, LLMChain
|
20 |
|
21 |
tokenizer_kwargs = dict(
|
22 |
max_length=128,
|
|
|
66 |
return ""
|
67 |
|
68 |
|
69 |
+
def assignOpen(bot, what: str, to: str, old: List):
|
70 |
+
old = ", ".join(old)
|
71 |
+
template = """'Assign a one-line {what} to this summary of the text of a review
|
72 |
+
{to}
|
73 |
+
already assigned themes are , {old}
|
74 |
+
theme"""
|
75 |
+
prompt = PromptTemplate(template=template, input_variables=["what", "to", "old"])
|
76 |
+
llm_chain = LLMChain(prompt=prompt, llm=bot)
|
77 |
+
generated = llm_chain.run(what=what, to=summary, old=old)
|
78 |
+
return generated
|
79 |
+
|
80 |
+
|
81 |
@st.cache_resource
|
82 |
def loadZeroShotClassification():
|
83 |
classifierzero = pipeline(
|
|
|
86 |
return classifierzero
|
87 |
|
88 |
|
89 |
+
@st.cache_resource
|
90 |
+
def loadopenModel():
|
91 |
+
llm = OpenLLM(
|
92 |
+
model_name="dolly-v2",
|
93 |
+
model_id="databricks/dolly-v2-3b",
|
94 |
+
temperature=0.94,
|
95 |
+
repetition_penalty=1.2,
|
96 |
+
)
|
97 |
+
return llm
|
98 |
+
|
99 |
+
|
100 |
def assignZeroShot(zero, to: str, old: List):
|
101 |
assigned = zero(to, old)
|
102 |
assigneddict = dict(zip(assigned["labels"], assigned["scores"]))
|
|
|
163 |
|
164 |
st.set_page_config(layout="wide", page_title="Amazon Review | Summarizer")
|
165 |
st.title("Amazon Review Summarizer")
|
|
|
166 |
|
|
|
167 |
uploaded_file = st.file_uploader("Choose a file", type=["xlsx", "xls", "csv"])
|
168 |
|
169 |
+
# try:
|
170 |
+
# bot = ChatBot(
|
171 |
+
# cookies={
|
172 |
+
# "hf-chat": st.secrets["hf-chat"],
|
173 |
+
# "token": st.secrets["token"],
|
174 |
+
# }
|
175 |
+
# )
|
176 |
+
# except ChatBotInitError as e:
|
177 |
+
# print(e)
|
178 |
|
179 |
summarizer_option = st.selectbox(
|
180 |
"Select Summarizer",
|
|
|
210 |
oneline = SimpleT5()
|
211 |
load_one_line_summarizer(model=oneline)
|
212 |
zeroline = loadZeroShotClassification()
|
213 |
+
bot = loadopenModel()
|
214 |
|
215 |
cancel_button = st.empty()
|
216 |
cancel_button2 = st.empty()
|
|
|
226 |
print(summarizer_option)
|
227 |
outputdf = pd.DataFrame()
|
228 |
try:
|
229 |
+
text = df["text"].values.tolist()
|
230 |
outputdf["text"] = text
|
231 |
if summarizer_option == "Custom trained on the dataset":
|
232 |
if summary_yes:
|
|
|
266 |
classes.append(output)
|
267 |
score = round(themePipe(x)[0][0]["score"], 2)
|
268 |
if score <= treshold:
|
269 |
+
onelineoutput = oneline.predict(x)[0]
|
270 |
time.sleep(SLEEP)
|
271 |
print("hit")
|
272 |
classesUnlabel.append(
|
273 |
+
assignOpen(
|
274 |
bot=bot,
|
275 |
what="theme",
|
276 |
to=onelineoutput,
|
277 |
old=themes,
|
278 |
)
|
279 |
+
)
|
280 |
classesUnlabelZero.append(
|
281 |
assignZeroShot(
|
282 |
zero=zeroline, to=onelineoutput, old=themes
|
|
|
306 |
classes.append(output)
|
307 |
score = round(subThemePipe(x)[0][0]["score"], 2)
|
308 |
if score <= treshold:
|
309 |
+
onelineoutput = oneline.predict(x)[0]
|
310 |
|
311 |
time.sleep(SLEEP)
|
312 |
|
313 |
print("hit")
|
314 |
classesUnlabel.append(
|
315 |
+
assignOpen(
|
316 |
bot=bot,
|
317 |
what="subtheme",
|
318 |
to=onelineoutput,
|
|
|
385 |
classes.append(output)
|
386 |
score = round(themePipe(x)[0][0]["score"], 2)
|
387 |
if score <= treshold:
|
388 |
+
onelineoutput = oneline.predict(x)[0]
|
389 |
|
390 |
print("hit")
|
391 |
time.sleep(SLEEP)
|
392 |
|
393 |
classesUnlabel.append(
|
394 |
+
assignOpen(
|
395 |
bot=bot,
|
396 |
what="theme",
|
397 |
to=onelineoutput,
|
|
|
427 |
classes.append(output)
|
428 |
score = round(subThemePipe(x)[0][0]["score"], 2)
|
429 |
if score <= treshold:
|
430 |
+
onelineoutput = oneline.predict(x)[0]
|
431 |
|
432 |
time.sleep(SLEEP)
|
433 |
print("hit")
|
434 |
classesUnlabel.append(
|
435 |
+
assignOpen(
|
436 |
bot=bot,
|
437 |
what="subtheme",
|
438 |
to=onelineoutput,
|
|
|
495 |
classes.append(output)
|
496 |
score = round(themePipe(x)[0][0]["score"], 2)
|
497 |
if score <= treshold:
|
498 |
+
onelineoutput = oneline.predict(x)[0]
|
499 |
|
500 |
time.sleep(SLEEP)
|
501 |
|
502 |
print("hit")
|
503 |
classesUnlabel.append(
|
504 |
+
assignOpen(
|
505 |
bot=bot,
|
506 |
what="theme",
|
507 |
to=onelineoutput,
|
|
|
537 |
score = round(subThemePipe(x)[0][0]["score"], 2)
|
538 |
if score <= treshold:
|
539 |
print("hit")
|
540 |
+
onelineoutput = oneline.predict(x)[0]
|
541 |
|
542 |
time.sleep(SLEEP)
|
543 |
classesUnlabel.append(
|
544 |
+
assignOpen(
|
545 |
bot=bot,
|
546 |
what="subtheme",
|
547 |
to=onelineoutput,
|