svjack commited on
Commit
33ab722
1 Parent(s): b05a5e4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +511 -0
app.py ADDED
@@ -0,0 +1,511 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio_client import Client
2
+ client = Client("https://svjack-entity-property-extractor-zh.hf.space")
3
+
4
+ import pandas as pd
5
+ import numpy as np
6
+ import os
7
+ import re
8
+
9
+ from langchain.vectorstores import FAISS
10
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
11
+ from langchain import chains
12
+ from rapidfuzz import fuzz
13
+
14
+ import pandas as pd
15
+
16
+ import chatglm_cpp
17
+ from pathlib import Path
18
+ import pandas as pd
19
+
20
+ from huggingface_hub import snapshot_download
21
+
22
+ if not os.path.exists("genshin_book_chunks_with_qa_sp"):
23
+ path = snapshot_download(
24
+ repo_id="svjack/genshin_book_chunks_with_qa_sp",
25
+ repo_type="dataset",
26
+ local_dir="genshin_book_chunks_with_qa_sp",
27
+ local_dir_use_symlinks = False
28
+ )
29
+
30
+ if not os.path.exists("bge_small_book_chunks_prebuld"):
31
+ path = snapshot_download(
32
+ repo_id="svjack/bge_small_book_chunks_prebuld",
33
+ repo_type="dataset",
34
+ local_dir="bge_small_book_chunks_prebuld",
35
+ local_dir_use_symlinks = False
36
+ )
37
+
38
+ if not os.path.exists("chatglm3-6b-bin"):
39
+ path = snapshot_download(
40
+ repo_id="svjack/chatglm3-6b-bin",
41
+ repo_type="model",
42
+ local_dir="chatglm3-6b-bin",
43
+ local_dir_use_symlinks = False
44
+ )
45
+
46
+
47
+ #model_file_path = "chatglm3-6b-int4.bin"
48
+ model_file_path = "chatglm3-6b-bin/chatglm3-6b-int4.bin"
49
+ print("load {}".format(model_file_path))
50
+ chatglm_llm = chatglm_cpp.Pipeline(Path(model_file_path))
51
+
52
+ def chat_messages(message, history, chatglm_llm = chatglm_llm,
53
+ max_length = 128, show_process = False
54
+ ):
55
+ flatten_history = []
56
+ for a, b in history:
57
+ flatten_history.append(
58
+ chatglm_cpp.ChatMessage(role="user", content=a)
59
+ )
60
+ flatten_history.append(
61
+ chatglm_cpp.ChatMessage(role="assistant", content=b)
62
+ )
63
+
64
+ streamer = chatglm_llm.chat(
65
+ flatten_history + [
66
+ chatglm_cpp.ChatMessage(role="user", content=message)
67
+ ], do_sample=False,
68
+ stream = True,
69
+ max_length = 5120
70
+ )
71
+
72
+ response = ""
73
+ for new_text in streamer:
74
+ response += new_text.content
75
+ if show_process:
76
+ print(response)
77
+ from IPython.display import clear_output
78
+ clear_output(wait=True)
79
+ yield response
80
+ if len(response) >= max_length:
81
+ break
82
+ #return response
83
+
84
+ '''
85
+ query = "警察是如何破获邪恶计划的?" ## 警 执律 盗
86
+ k = 10
87
+ uniform_recall_docs_to_pairwise_cos(
88
+ query,
89
+ docsearch_bge_loaded.similarity_search_with_score(query, k = k, ),
90
+ bge_book_embeddings
91
+ )
92
+ '''
93
+ def uniform_recall_docs_to_pairwise_cos(query ,doc_list, embeddings):
94
+ assert type(doc_list) == type([])
95
+ from langchain.evaluation import load_evaluator
96
+ from langchain.evaluation import EmbeddingDistance
97
+ hf_evaluator = load_evaluator("pairwise_embedding_distance", embeddings=embeddings,
98
+ distance_metric = EmbeddingDistance.COSINE)
99
+ return sorted(pd.Series(doc_list).map(lambda x: x[0].page_content).map(lambda x:
100
+ (x ,hf_evaluator.evaluate_string_pairs(prediction=query, prediction_b=x)["score"])
101
+ ).values.tolist(), key = lambda t2: t2[1])
102
+
103
+ '''
104
+ sort_by_kw("深渊使徒", book_df)["content_chunks_formatted"].head(5).values.tolist() ### 深渊
105
+ '''
106
+ def sort_by_kw(kw, book_df):
107
+ req = book_df.copy()
108
+ req["sim_score"] = req.apply(
109
+ lambda x:
110
+ max(map(lambda y: fuzz.ratio(y, kw) ,eval(x["person"]) + eval(x["locate"]) + eval(x["locate"]))) if \
111
+ eval(x["person"]) + eval(x["locate"]) + eval(x["locate"]) else 0
112
+ , axis = 1
113
+ )
114
+ req = req.sort_values(by = "sim_score", ascending = False)
115
+ return req
116
+
117
+ def recall_chuncks(query, docsearch, embedding, book_df,
118
+ sparse_threshold = 30,
119
+ dense_top_k = 10,
120
+ rerank_by = "emb",
121
+ ):
122
+ sparse_output = sort_by_kw(query, book_df)[["content_chunks_formatted", "sim_score"]]
123
+ sparse_output_list = sparse_output[
124
+ sparse_output["sim_score"] >= sparse_threshold
125
+ ]["content_chunks_formatted"].values.tolist()
126
+ dense_output = uniform_recall_docs_to_pairwise_cos(
127
+ query,
128
+ docsearch.similarity_search_with_score(query, k = dense_top_k,),
129
+ embedding
130
+ )
131
+ for chunck, score in dense_output:
132
+ if chunck not in sparse_output_list:
133
+ sparse_output_list.append(chunck)
134
+ if rerank_by == "emb":
135
+ from langchain.evaluation import load_evaluator
136
+ from langchain.evaluation import EmbeddingDistance
137
+ hf_evaluator = load_evaluator("pairwise_embedding_distance", embeddings=embedding,
138
+ distance_metric = EmbeddingDistance.COSINE)
139
+ return pd.Series(sorted(pd.Series(sparse_output_list).map(lambda x:
140
+ (x ,hf_evaluator.evaluate_string_pairs(prediction=query, prediction_b=x)["score"])
141
+ ).values.tolist(), key = lambda t2: t2[1])).map(lambda x: x[0]).values.tolist()
142
+ else:
143
+ sparse_output_list = sorted(sparse_output_list, key = lambda x: fuzz.ratio(x, query), reverse = True)
144
+ return sparse_output_list
145
+
146
+ def reduce_list_by_order(text_list, as_text = False):
147
+ if not text_list:
148
+ return
149
+ df = pd.DataFrame(
150
+ list(map(lambda x: (x.split("\n")[0], x.split("\n")[1], "\n".join(x.split("\n")[2:])), text_list))
151
+ ).groupby([0, 1])[2].apply(list).map(lambda x: sorted(x, key = len, reverse=True)).map(
152
+ "\n\n".join
153
+ ).reset_index()
154
+ d = dict(df.apply(lambda x: ((x.iloc[0], x.iloc[1]), x.iloc[2]), axis = 1).values.tolist())
155
+ #return df
156
+ order_list = []
157
+ for x in text_list:
158
+ a, b = x.split("\n")[0], x.split("\n")[1]
159
+ if not order_list:
160
+ order_list = [[a, [b]]]
161
+ elif a in list(map(lambda t2: t2[0], order_list)):
162
+ order_list[list(map(lambda t2: t2[0], order_list)).index(a)][1].append(b)
163
+ elif a not in list(map(lambda t2: t2[0], order_list)):
164
+ order_list.append([a, [b]])
165
+ df = pd.DataFrame(pd.DataFrame(order_list).explode(1).dropna().apply(
166
+ lambda x: (x.iloc[0], x.iloc[1], d[(x.iloc[0], x.iloc[1])]), axis = 1
167
+ ).values.tolist()).drop_duplicates()
168
+ if as_text:
169
+ return "\n\n".join(
170
+ df.apply(lambda x: "{}\n{}\n{}".format(x.iloc[0], x.iloc[1], x.iloc[2]), axis = 1).values.tolist()
171
+ )
172
+ return df
173
+
174
+ def build_gpt_prompt(query, docsearch, embedding, book_df, max_context_length = 4090):
175
+ l = recall_chuncks(query, docsearch, embedding, book_df)
176
+ context = reduce_list_by_order(l, as_text = True)
177
+ context_l = []
178
+ for ele in context.split("\n"):
179
+ if sum(map(len, context_l)) >= max_context_length:
180
+ break
181
+ context_l.append(ele)
182
+ context = "\n".join(context_l).strip()
183
+ template = """使用以下上下文来回答最后的问题。如果你不知道答案,就说你不知道,不要试图编造答案。尽量使答案简明扼要。总是在回答的最后说“谢谢你的提问!”。
184
+
185
+ {context}
186
+
187
+ 问题: {question}
188
+ 有用的回答:"""
189
+ return template.format(
190
+ **{
191
+ "context": context,
192
+ "question": query
193
+ }
194
+ )
195
+
196
+ def collect_prompt_to_hist_list(prompt, add_assistant = False):
197
+ l = pd.Series(prompt.split("\n\n")).map(lambda x: x.strip()).values.tolist()
198
+ ll = []
199
+ for ele in l:
200
+ if not ll:
201
+ ll.append(ele)
202
+ else:
203
+ if ele.startswith("文章标题:") or ele.startswith("问题:"):
204
+ ll.append(ele)
205
+ else:
206
+ ll[-1] += ("\n\n" + ele)
207
+ if add_assistant:
208
+ ll_ = []
209
+ for i in range(len(ll)):
210
+ if i == 0:
211
+ ll_.append((ll[i], "好的。"))
212
+ elif i < len(ll) - 1:
213
+ ll_.append((ll[i], "我读懂了。"))
214
+ else:
215
+ ll_.append((ll[i], ""))
216
+ return ll_
217
+ else:
218
+ return ll
219
+
220
+ def row_to_content_ask(r):
221
+ question = r["question"]
222
+ content_list = r["content_list"]
223
+ assert type(content_list) == type([])
224
+ content_prompt_list = pd.Series(content_list).map(
225
+ lambda x: '''
226
+ {}\n从上面的相关的叙述中抽取包含"{}"中词汇的相关语段。
227
+ '''.format(x, question).strip()
228
+ ).values.tolist()
229
+ return content_prompt_list
230
+
231
+ def entity_extractor_by_glm(query,
232
+ show_process = False, max_length = 512,
233
+ return_out_text = False,
234
+ ):
235
+ import re
236
+ hist = [
237
+ ['请从下面的句子中提取实体和属性。不需要进行进一步解释。', '好的。'],
238
+ ['问题:宁波在哪个省份?', '实体:宁波 属性:省份'],
239
+ ['问题:中国的货币是什么?', '实体:中国 属性:货币'],
240
+ ['问题:百慕大三角在什么地方?', '实体:百慕大三角 属性:地方'],
241
+ ['问题:谁是最可爱的人?', "实体:人 属性:可爱"],
242
+ ['问题:黄河的拐点在哪里?', "实体:黄河 属性:拐点"],
243
+ #['问题:魔神归终在哪里?', '实体:归终 属性:哪里'],
244
+ #["玉米的引进时间是什么时候?", ""]
245
+ ]
246
+
247
+ out_text = chat_messages("问题:{}".format(query),
248
+ hist,
249
+ )
250
+ req = ""
251
+ for ele in out_text:
252
+ req = ele
253
+ out_text = req
254
+
255
+ if return_out_text:
256
+ return out_text
257
+ e_list = re.findall(r"实体(.*?)属性", out_text.replace("\n", " "))
258
+ if e_list:
259
+ return re.findall(u"[\u4e00-\u9fa5]+" ,e_list[0])
260
+ return None
261
+
262
+ def unzip_string(x, size = 2):
263
+ if len(x) <= size:
264
+ return [x]
265
+ req = []
266
+ for i in range(len(x) - size + 1):
267
+ req.append(x[i: i + size])
268
+ return req
269
+
270
+ def entity_extractor_by_adapter(x):
271
+ import json
272
+ result = client.predict(
273
+ x, # str in 'question' Textbox component
274
+ api_name="/predict"
275
+ )
276
+ with open(result, "r") as f:
277
+ req = json.load(f)
278
+ req_list = req.get("E-TAG", [])
279
+ req_ = []
280
+ for ele in req_list:
281
+ for x in unzip_string(ele, 2):
282
+ if x not in req_:
283
+ req_.append(x)
284
+ return req_
285
+
286
+ ##### maybe 0.1
287
+ def query_content_ask_func(question, content_list,
288
+ setfit_model, show_process = False, max_length = 1024):
289
+ ask_list = row_to_content_ask(
290
+ {
291
+ "question": question,
292
+ "content_list": content_list
293
+ }
294
+ )
295
+ #return ask_list
296
+ req = []
297
+ for prompt in ask_list:
298
+
299
+ out_text = chat_messages(prompt + "如果没有提到相关内容,请回答不知道。使用中文进行回答,不要包含任何英文。",
300
+ [], show_process = show_process, max_length = max_length
301
+ )
302
+ req_ = ""
303
+ for ele in out_text:
304
+ req_ = ele
305
+ out_text = req_
306
+
307
+ req.append(out_text)
308
+ d = {
309
+ "question": question,
310
+ "content_list": content_list
311
+ }
312
+ assert len(req) == len(ask_list)
313
+ d["question_content_relate_list"] = req
314
+ d["relate_prob_list"] = setfit_model.predict_proba(
315
+ req
316
+ ).numpy()[:, 1].tolist()
317
+ return d
318
+
319
+ def build_relate_ask_list(query, docsearch_bge_loaded, bge_book_embeddings, book_df,
320
+ setfit_model, as_content_score_df = True,
321
+ show_process = False, add_relate_entities = False,
322
+ max_length = 1024):
323
+ prompt = build_gpt_prompt(query, docsearch_bge_loaded, bge_book_embeddings, book_df)
324
+ prompt_list = collect_prompt_to_hist_list(prompt)
325
+ #print(prompt_list)
326
+ question = prompt_list[-1].split("\n")[0]
327
+ content_list = prompt_list[1:-1]
328
+
329
+ d = query_content_ask_func(question, content_list,
330
+ setfit_model, show_process = show_process)
331
+
332
+ #entity_list = entity_extractor_by_glm(query,
333
+ #show_process = show_process, max_length = max_length)
334
+ entity_list = entity_extractor_by_adapter(query)
335
+ if type(entity_list) != type([]):
336
+ entity_list = []
337
+
338
+ d["in_content_entity_list"] = list(map(lambda x:
339
+ list(filter(lambda e: e in x, entity_list))
340
+ , d["content_list"]))
341
+
342
+ if add_relate_entities:
343
+ relate_content_entity_list = [[]] * len(content_list)
344
+
345
+ for entity in entity_list:
346
+ entity_content_score_d = query_content_ask_func(entity, d["content_list"],
347
+ setfit_model, show_process = show_process)
348
+ lookup_df = pd.DataFrame(
349
+ list(zip(*[entity_content_score_d["content_list"],
350
+ entity_content_score_d["relate_prob_list"]]))
351
+ )
352
+ for ii, (i, r) in enumerate(lookup_df.iterrows()):
353
+ if r.iloc[1] >= 0.5 and entity not in relate_content_entity_list[ii]:
354
+ #relate_content_entity_list[ii].append(entity)
355
+ relate_content_entity_list[ii] = relate_content_entity_list[ii] + [entity]
356
+
357
+ d["relate_content_entity_list"] = relate_content_entity_list
358
+
359
+ if as_content_score_df:
360
+ if add_relate_entities:
361
+ df = pd.concat(
362
+ [
363
+ pd.Series(d["content_list"]).map(lambda x: x.strip()),
364
+ pd.Series(d["in_content_entity_list"]),
365
+ pd.Series(d["relate_content_entity_list"]),
366
+ pd.Series(d["question_content_relate_list"]).map(lambda x: x.strip()),
367
+ pd.Series(d["relate_prob_list"])
368
+ ], axis = 1
369
+ )
370
+ df.columns = ["content", "entities", "relate_entities", "relate_eval_str", "score"]
371
+ else:
372
+ df = pd.concat(
373
+ [
374
+ pd.Series(d["content_list"]).map(lambda x: x.strip()),
375
+ pd.Series(d["in_content_entity_list"]),
376
+ #pd.Series(d["relate_content_entity_list"]),
377
+ pd.Series(d["question_content_relate_list"]).map(lambda x: x.strip()),
378
+ pd.Series(d["relate_prob_list"])
379
+ ], axis = 1
380
+ )
381
+ df.columns = ["content", "entities", "relate_eval_str", "score"]
382
+ req = []
383
+ entities_num_list = df["entities"].map(len).drop_duplicates().dropna().sort_values(ascending = False).\
384
+ values.tolist()
385
+ for e_num in entities_num_list:
386
+ req.append(
387
+ df[
388
+ df["entities"].map(lambda x: len(x) == e_num)
389
+ ].sort_values(by = "score", ascending = False)
390
+ )
391
+ return pd.concat(req, axis = 0)
392
+ #df = df.sort_values(by = "score", ascending = False)
393
+ #return df
394
+ return d
395
+
396
+ def run_all(query, docsearch_bge_loaded, bge_book_embeddings, book_df,
397
+ setfit_model, only_return_prompt = False):
398
+ df = build_relate_ask_list(query, docsearch_bge_loaded, bge_book_embeddings, book_df,
399
+ setfit_model, show_process=False)
400
+ info_list = df[
401
+ df.apply(
402
+ lambda x: x["score"] >= 0.1 and bool(x["entities"]), axis = 1
403
+ )
404
+ ].values.tolist()
405
+ if not info_list:
406
+ return df, info_list, "没有相关内容,谢谢你的提问���"
407
+ prompt = '''
408
+ 问题: {}
409
+ 根据下面的内容回答上面的问题,如果无法根据内容确定答案,请回答不知道。
410
+ {}
411
+ '''.format(query, "\n\n".join(pd.Series(info_list).map(lambda x: x[0]).values.tolist()))
412
+ if only_return_prompt:
413
+ return df, info_list, prompt
414
+
415
+ q_head = "\n".join(prompt.split("\n")[:2])
416
+ c_tail = "\n".join(prompt.split("\n")[2:])[:4000]
417
+ out_text = chat_messages(
418
+ c_tail + "\n" + q_head.replace("下面的内容回答上面的问题", "上面的内容回答问题") + "用中文回答问题。",
419
+ [], show_process = False, max_length = 512
420
+ )
421
+ req_ = ""
422
+ for ele in out_text:
423
+ req_ = ele
424
+ out_text = req_.strip()
425
+
426
+ return df, info_list, out_text
427
+
428
+ import gradio as gr
429
+
430
+ #book_df = pd.read_csv("genshin_book_chunks_with_qa_sp.csv")
431
+ book_df = pd.read_csv("genshin_book_chunks_with_qa_sp/genshin_book_chunks_with_qa_sp.csv")
432
+ book_df["content_chunks"].dropna().drop_duplicates().shape
433
+
434
+ book_df["content_chunks_formatted"] = book_df.apply(
435
+ lambda x: "文章标题:{}\n子标题:{}\n内容:{}".format(x["title"], x["sub_title"], x["content_chunks"]),
436
+ axis = 1
437
+ )
438
+
439
+ texts = book_df["content_chunks_formatted"].dropna().drop_duplicates().values.tolist()
440
+
441
+ #embedding_path = "bge-small-book-qa/"
442
+ embedding_path = "svjack/bge-small-book-qa"
443
+ bge_book_embeddings = HuggingFaceEmbeddings(model_name=embedding_path)
444
+ docsearch_bge_loaded = FAISS.load_local("bge_small_book_chunks_prebuld/", bge_book_embeddings)
445
+
446
+ from setfit import SetFitModel
447
+ setfit_model = SetFitModel.from_pretrained("svjack/setfit_info_cls")
448
+
449
+ with gr.Blocks() as demo:
450
+ title = gr.HTML(
451
+ """<h1 align="center"> <font size="+3"> Genshin Impact Book QA ChatGLM3 Demo 🏃 </font> </h1>""",
452
+ elem_id="title",
453
+ )
454
+
455
+ with gr.Column():
456
+ with gr.Row():
457
+ query = gr.Text(label = "输入问题:", lines = 1, interactive = True, scale = 5.0)
458
+ run_button = gr.Button("得到答案")
459
+ output = gr.Text(label = "回答:", lines = 5, interactive = True)
460
+ recall_items = gr.JSON(label = "召回相关内容", interactive = False)
461
+
462
+ with gr.Row():
463
+ gr.Examples(
464
+ [
465
+ '丘丘人有哪些生活习惯?',
466
+ #'岩王帝君和归终是什么关系?',
467
+ '盐之魔神的下场是什么样的?',
468
+ #'归终是谁?',
469
+ '岩王帝君是一个什么样的人?',
470
+ #'铳枪手的故事内容是什么样的?',
471
+ '白夜国的子民遭遇了什么?',
472
+ '大蛇居住在哪里?',
473
+ '珊瑚宫有哪些传说?',
474
+ '灵光颂的内容是什么样的?',
475
+ '连心珠讲了一件什么事情?',
476
+ #'梓心是谁?',
477
+ #'枫丹有哪些故事?',
478
+ '璃月有哪些故事?',
479
+ #'轻策庄有哪些故事?',
480
+ '瑶光滩有哪些故事?',
481
+ '稻妻有哪些故事?',
482
+ '海祇岛有哪些故事?',
483
+ '须弥有哪些故事?',
484
+ '蒙德有哪些故事?',
485
+ '璃月有哪些奇珍异宝?',
486
+ #'狸猫和天狗是什么关系?',
487
+ ],
488
+ inputs = query,
489
+ label = "被书目内容包含的问题"
490
+ )
491
+ with gr.Row():
492
+ gr.Examples(
493
+ [
494
+ '爱丽丝女士是可莉的妈妈吗?',
495
+ '摘星崖是什么样的?',
496
+ '丘丘人使用的是什么文字?',
497
+ '深渊使徒哪里来的?',
498
+ '发条机关可以用来做什么?',
499
+ '那先朱那做了什么?',
500
+ ],
501
+ inputs = query,
502
+ label = "没有被书目明确提到的问题"
503
+ )
504
+
505
+ run_button.click(lambda x:
506
+ run_all(x, docsearch_bge_loaded, bge_book_embeddings, book_df,
507
+ setfit_model = setfit_model)[1:],
508
+ query, [recall_items, output]
509
+ )
510
+
511
+ demo.queue(max_size=4, concurrency_count=1).launch(debug=True, show_api=False, share = True)