ZHANG789456 commited on
Commit
2079a32
1 Parent(s): c3fad63

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +147 -0
  2. model_big.pth +3 -0
  3. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ import torch.nn.functional as F
4
+ from transformers import BertModel
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
6
+ import json
7
+ import gradio as gr
8
+
9
+
10
+ eg_text = ' 酒店的地理位置实在不错,所以从大堂开始就令人惊艳。城景房不但在房间可以看到上海的美景'
11
+ model_name = 'bert-base-chinese'
12
+
13
+ max_len = 128
14
+ n_class = 2
15
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+ tokenizer_cn = AutoTokenizer.from_pretrained(model_name)
17
+ voc_size = len(tokenizer_cn.vocab)
18
+ name_list = ['Negative review', 'Positive review']
19
+
20
+ class bertBlock(nn.Module):
21
+ def __init__(self,):
22
+ super().__init__()
23
+ self.model_block = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=100).to(device)
24
+
25
+ def forward(self, text_b):
26
+ x = self.model_block(text_b)
27
+ return x.logits
28
+
29
+
30
+ class textCNNblock(nn.Module):
31
+ def __init__(self,):
32
+ super().__init__()
33
+ emb_dim = 100
34
+ # n_class = 4
35
+ kernels=[3,4,5]
36
+ kernel_number=[150,150,150]
37
+ self.embd = nn.Embedding(voc_size, emb_dim)
38
+ self.convs = nn.ModuleList([nn.Conv1d(max_len, number, size,padding=size) for (size,number) in zip(kernels,kernel_number)])
39
+ self.dropout=nn.Dropout(0.1)
40
+ self.out = nn.Linear(sum(kernel_number), 100)
41
+
42
+ def forward(self, x):
43
+ x = self.embd(x)
44
+
45
+ x = [F.relu(conv(x)) for conv in self.convs]
46
+ x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
47
+ x = torch.cat(x, 1)
48
+ x = self.dropout(x)
49
+ x = self.out(x)
50
+ return x
51
+
52
+
53
+ class LSTMModelblock(nn.Module):
54
+ def __init__(self,):
55
+ super().__init__()
56
+ emb_dim = 100
57
+ # n_class = 2
58
+ self.embd = nn.Embedding(voc_size, emb_dim)
59
+ self.lstm = nn.LSTM(emb_dim,50)
60
+ self.out = nn.Linear(6400, 100)
61
+ self.flatten = nn.Flatten()
62
+
63
+ def forward(self, x):
64
+ x = self.embd(x)
65
+
66
+ x,_ = self.lstm(x)
67
+ x = self.flatten(x)
68
+ x = self.out(x)
69
+ return x
70
+
71
+
72
+ class BERT_CNN_LSTM(nn.Module):
73
+ def __init__(self, ):
74
+ super(BERT_CNN_LSTM, self).__init__()
75
+
76
+ self.bert = bert_block
77
+ self.lstm = lstm_model_block
78
+ self.cnn = text_cnn_block
79
+ self.fc1 = nn.Linear(300, 100)
80
+ self.fc2 = nn.Linear(100, n_class)
81
+ self.dropout1 = nn.Dropout(0.2)
82
+ self.dropout2 = nn.Dropout(0.2)
83
+ self.att = nn.TransformerEncoderLayer(d_model=100, nhead=2)
84
+ self.flatten = nn.Flatten()
85
+
86
+ def forward(self, input_ids):
87
+ bert_out = self.bert(input_ids)
88
+ lstm_out = self.lstm(input_ids)
89
+ cnn_out = self.cnn(input_ids)
90
+
91
+ x = torch.stack((bert_out,lstm_out, cnn_out), dim=1)
92
+ x = self.att(x)
93
+ x = self.flatten(x)
94
+ x = self.fc1(x)
95
+ x = self.dropout1(x)
96
+ x = self.fc2(x)
97
+ return x
98
+
99
+
100
+ bert_block = bertBlock().to(device)
101
+ text_cnn_block = textCNNblock().to(device)
102
+ lstm_model_block = LSTMModelblock().to(device)
103
+ # 创建模型
104
+ model_big_load = BERT_CNN_LSTM()
105
+ model_big_load.to(device)
106
+
107
+ model_big_load.load_state_dict(torch.load("model_big.pth",map_location=torch.device('cpu')))
108
+ model_big_load.eval()
109
+
110
+
111
+
112
+ def predict(one_text ):
113
+ one_result = tokenizer_cn(one_text,padding='max_length', max_length=max_len, truncation=True, return_tensors="pt")
114
+ # print(one_result)
115
+ one_ids = one_result.input_ids[0]
116
+ one_ids = one_ids.unsqueeze(0).to(device)
117
+
118
+ # 使用模型进行预测
119
+ with torch.no_grad():
120
+ output = model_big_load(one_ids)
121
+ # print(output)
122
+ # 计算预测概率
123
+ pred_score = nn.functional.softmax(output[0], dim=0)
124
+ pred_score = torch.max(pred_score).cpu().numpy()
125
+
126
+ # 获取预测结果
127
+ pred_index = torch.argmax(output, dim=1).item()
128
+ pred_label = name_list[pred_index]
129
+
130
+ print(f"predict class name : {pred_label} \npredict score : {pred_score}")
131
+ print(pred_index)
132
+ # 转为json字符串格式
133
+ result_dict = {'pred_score':str(pred_score),'pred_index':str(pred_index),'pred_label':pred_label }
134
+ result_json = json.dumps(result_dict)
135
+
136
+ return result_json
137
+
138
+
139
+ demo = gr.Interface(fn=predict,
140
+ inputs="text",
141
+
142
+ outputs="text",
143
+ examples=['酒店的地理位置实在不错,所以从大堂开始就令人惊艳。城景房不但在房间可以看到上海的美景','住了一次,感觉很差。灯光太暗,房间比较旧!' ],
144
+ )
145
+
146
+ # demo.launch(debug=True)
147
+ demo.launch()
model_big.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bbe4e4fc7c2a54cdaca6d1329cd6ffc298b1c961e6ee7cef0be53fef851fa66
3
+ size 432097082
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers
2
+ torch