ubuntu commited on
Commit
7a5da00
1 Parent(s): 45f1d1e

Add application file

Browse files
Files changed (2) hide show
  1. app.py +84 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from scipy.spatial.distance import cosine
3
+ from transformers import AutoModel, AutoTokenizer
4
+ from argparse import Namespace
5
+ import torch
6
+ from LuotuoEmbedding.lib.tsne import TSNE_Plot
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained("silk-road/luotuo-bert")
9
+ model_args = Namespace(do_mlm=None,
10
+ pooler_type="cls",
11
+ temp=0.05,
12
+ mlp_only_train=False,
13
+ init_embeddings_model=None)
14
+ model = AutoModel.from_pretrained("silk-road/luotuo-bert",
15
+ trust_remote_code=True,
16
+ model_args=model_args)
17
+
18
+ def divide_str(s, sep=['\n', '.', '。']):
19
+ mid_len = len(s) // 2 # 中心点位置
20
+ best_sep_pos = len(s) + 1 # 最接近中心点的分隔符位置
21
+ best_sep = None # 最接近中心点的分隔符
22
+ for curr_sep in sep:
23
+ sep_pos = s.rfind(curr_sep, 0, mid_len) # 从中心点往左找分隔符
24
+ if sep_pos > 0 and abs(sep_pos - mid_len) < abs(best_sep_pos - mid_len):
25
+ best_sep_pos = sep_pos
26
+ best_sep = curr_sep
27
+ if not best_sep: # 没有找到分隔符
28
+ return s, ''
29
+ return s[:best_sep_pos + 1], s[best_sep_pos + 1:]
30
+
31
+ def strong_divide( s ):
32
+ left, right = divide_str(s)
33
+
34
+ if right != '':
35
+ return left, right
36
+
37
+ whole_sep = ['\n', '.', ',', '、', ';', ',', ';',\
38
+ ':', '!', '?', '(', ')', '”', '“', \
39
+ '’', '‘', '[', ']', '{', '}', '<', '>', \
40
+ '/', '''\''', '|', '-', '=', '+', '*', '%', \
41
+ '$', '''#''', '@', '&', '^', '_', '`', '~',\
42
+ '·', '…']
43
+ left, right = divide_str(s, sep = whole_sep )
44
+
45
+ if right != '':
46
+ return left, right
47
+
48
+ mid_len = len(s) // 2
49
+ return s[:mid_len], s[mid_len:]
50
+
51
+ def generate_image(text_input):
52
+ # 将输入的文本按行分割并保存到列表中
53
+ text_input = text_input.split('\n')
54
+ label = []
55
+ for idx, i in enumerate(text_input):
56
+ if '#' in i:
57
+ label.append(i[i.find('#') + 1:])
58
+ text_input[idx] = i[:i.find('#')]
59
+ else:
60
+ label.append('No.{}'.format(idx))
61
+
62
+ divided_text = [strong_divide(i) for i in text_input]
63
+ text_left, text_right = [i[0] for i in divided_text], [i[1] for i in divided_text]
64
+ inputs = tokenizer(text_left, padding=True, truncation=True, return_tensors="pt")
65
+ with torch.no_grad():
66
+ embeddings_left = model(**inputs, output_hidden_states=True, return_dict=True, sent_emb=True).pooler_output
67
+ inputs = tokenizer(text_right, padding=True, truncation=True, return_tensors="pt")
68
+ with torch.no_grad():
69
+ embeddings_right = model(**inputs, output_hidden_states=True, return_dict=True, sent_emb=True).pooler_output
70
+
71
+ merged_list = text_left + text_right
72
+ merged_embed = torch.cat((embeddings_left, embeddings_right), dim=0)
73
+ tsne_plot = TSNE_Plot(merged_list, merged_embed, label=label * 2, n_annotation_positions=len(merged_list))
74
+ fig = tsne_plot.tsne_plot(n_sentence=len(merged_list), return_fig=True)
75
+ return fig
76
+
77
+ with gr.Blocks() as demo:
78
+ name = gr.inputs.Textbox(lines=20,
79
+ placeholder='在此输入歌词,每一行为一个输入,如果需要输入歌词对应的歌名,请用#隔开\n例如:听雷声 滚滚 他默默 闭紧嘴唇 停止吟唱暮色与想念 他此刻沉痛而危险 听雷声 滚滚 他渐渐 感到胸闷 乌云阻拦明月涌河湾 他起身独立向荒原#河北墨麒麟')
80
+ output = gr.Plot()
81
+ btn = gr.Button("Generate")
82
+ btn.click(fn=generate_image, inputs=name, outputs=output, api_name="generate-image")
83
+
84
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ kaleido
2
+ gradio
3
+ transformers
4
+ openTSNE